Ticket #1456: string-iter-remove-old-api-v6.diff
File string-iter-remove-old-api-v6.diff, 27.1 KB (added by nwellnhof, 12 years ago) |
---|
-
include/parrot/encoding.h
diff --git a/include/parrot/encoding.h b/include/parrot/encoding.h index 75055b8..67d2866 100644
a b 30 30 31 31 struct string_iterator_t; /* s. parrot/string.h */ 32 32 33 typedef void (*encoding_iter_init_t)(PARROT_INTERP, const STRING *src,34 struct string_iterator_t *);35 33 typedef UINTVAL (*encoding_iter_get_t)( 36 34 PARROT_INTERP, const STRING *str, const String_iter *i, INTVAL offset); 37 35 typedef void (*encoding_iter_skip_t)( … … 54 52 encoding_get_bytes_t get_bytes; 55 53 encoding_codepoints_t codepoints; 56 54 encoding_bytes_t bytes; 57 encoding_iter_init_t iter_init;58 55 encoding_find_cclass_t find_cclass; 59 56 encoding_hash_t hash; 60 57 encoding_iter_get_t iter_get; … … 223 220 ((src)->encoding)->codepoints((i), (src)) 224 221 #define ENCODING_BYTES(i, src) \ 225 222 ((src)->encoding)->bytes((i), (src)) 226 #define ENCODING_ITER_INIT(i, src, iter) \227 ((src)->encoding)->iter_init((i), (src), (iter))228 223 #define ENCODING_FIND_CCLASS(i, src, typetable, flags, pos, end) \ 229 224 ((src)->encoding)->find_cclass((i), (src), (typetable), (flags), (pos), (end)) 230 225 #define ENCODING_HASH(i, src, seed) \ -
include/parrot/string.h
diff --git a/include/parrot/string.h b/include/parrot/string.h index 7d87f8e..ddd9254 100644
a b 29 29 30 30 /* String iterator */ 31 31 typedef struct string_iterator_t { 32 const STRING *str;33 32 UINTVAL bytepos; 34 33 UINTVAL charpos; 35 UINTVAL (*get_and_advance)(PARROT_INTERP, struct string_iterator_t *i);36 void (*set_and_advance)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL c);37 void (*set_position)(PARROT_INTERP, struct string_iterator_t *i, UINTVAL pos);38 34 } String_iter; 39 35 40 36 #define STRING_ITER_INIT(i, iter) \ -
src/string/encoding/fixed_8.c
diff --git a/src/string/encoding/fixed_8.c b/src/string/encoding/fixed_8.c index 13448f4..7733d87 100644
a b 41 41 __attribute__nonnull__(2) 42 42 __attribute__nonnull__(3); 43 43 44 static UINTVAL fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))45 __attribute__nonnull__(1)46 __attribute__nonnull__(2)47 FUNC_MODIFIES(*iter);48 49 44 static UINTVAL fixed8_iter_get(PARROT_INTERP, 50 45 ARGIN(const STRING *str), 51 46 ARGIN(const String_iter *iter), … … 88 83 __attribute__nonnull__(3) 89 84 FUNC_MODIFIES(*iter); 90 85 91 static void fixed8_set_next(PARROT_INTERP,92 ARGMOD(String_iter *iter),93 UINTVAL c)94 __attribute__nonnull__(1)95 __attribute__nonnull__(2)96 FUNC_MODIFIES(*iter);97 98 static void fixed8_set_position(SHIM_INTERP,99 ARGMOD(String_iter *iter),100 UINTVAL pos)101 __attribute__nonnull__(2)102 FUNC_MODIFIES(*iter);103 104 86 static size_t fixed_8_hash(SHIM_INTERP, 105 87 ARGIN(const STRING *s), 106 88 size_t hashval) … … 137 119 __attribute__nonnull__(1) 138 120 __attribute__nonnull__(2); 139 121 140 static void iter_init(SHIM_INTERP,141 ARGIN(const STRING *src),142 ARGOUT(String_iter *iter))143 __attribute__nonnull__(2)144 __attribute__nonnull__(3)145 FUNC_MODIFIES(*iter);146 147 122 static void set_byte(PARROT_INTERP, 148 123 ARGIN(const STRING *source_string), 149 124 UINTVAL offset, … … 164 139 #define ASSERT_ARGS_find_cclass __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 165 140 PARROT_ASSERT_ARG(s) \ 166 141 , PARROT_ASSERT_ARG(typetable)) 167 #define ASSERT_ARGS_fixed8_get_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\168 PARROT_ASSERT_ARG(interp) \169 , PARROT_ASSERT_ARG(iter))170 142 #define ASSERT_ARGS_fixed8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 171 143 PARROT_ASSERT_ARG(interp) \ 172 144 , PARROT_ASSERT_ARG(str) \ … … 185 157 #define ASSERT_ARGS_fixed8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 158 PARROT_ASSERT_ARG(str) \ 187 159 , PARROT_ASSERT_ARG(iter)) 188 #define ASSERT_ARGS_fixed8_set_next __attribute__unused__ int _ASSERT_ARGS_CHECK = (\189 PARROT_ASSERT_ARG(interp) \190 , PARROT_ASSERT_ARG(iter))191 #define ASSERT_ARGS_fixed8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\192 PARROT_ASSERT_ARG(iter))193 160 #define ASSERT_ARGS_fixed_8_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 194 161 PARROT_ASSERT_ARG(s)) 195 162 #define ASSERT_ARGS_get_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 203 170 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 204 171 PARROT_ASSERT_ARG(interp) \ 205 172 , PARROT_ASSERT_ARG(source_string)) 206 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\207 PARROT_ASSERT_ARG(src) \208 , PARROT_ASSERT_ARG(iter))209 173 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 210 174 PARROT_ASSERT_ARG(interp) \ 211 175 , PARROT_ASSERT_ARG(source_string)) … … 540 504 541 505 /* 542 506 543 =item C<static UINTVAL fixed8_get_next(PARROT_INTERP, String_iter *iter)>544 545 Moves the string iterator C<i> to the next codepoint.546 547 =cut548 549 */550 551 static UINTVAL552 fixed8_get_next(PARROT_INTERP, ARGMOD(String_iter *iter))553 {554 ASSERT_ARGS(fixed8_get_next)555 const UINTVAL c = get_byte(interp, iter->str, iter->charpos++);556 iter->bytepos++;557 return c;558 }559 560 /*561 562 =item C<static void fixed8_set_next(PARROT_INTERP, String_iter *iter, UINTVAL563 c)>564 565 With the string iterator C<i>, appends the codepoint C<c> and advances to the566 next position in the string.567 568 =cut569 570 */571 572 static void573 fixed8_set_next(PARROT_INTERP, ARGMOD(String_iter *iter), UINTVAL c)574 {575 ASSERT_ARGS(fixed8_set_next)576 set_byte(interp, iter->str, iter->charpos++, c);577 iter->bytepos++;578 }579 580 /*581 582 =item C<static void fixed8_set_position(PARROT_INTERP, String_iter *iter,583 UINTVAL pos)>584 585 Moves the string iterator C<i> to the position C<n> in the string.586 587 =cut588 589 */590 591 static void592 fixed8_set_position(SHIM_INTERP, ARGMOD(String_iter *iter), UINTVAL pos)593 {594 ASSERT_ARGS(fixed8_set_position)595 iter->bytepos = iter->charpos = pos;596 PARROT_ASSERT(pos <= Buffer_buflen(iter->str));597 }598 599 600 /*601 602 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter603 *iter)>604 605 Initializes for string C<src> the string iterator C<iter>.606 607 =cut608 609 */610 611 static void612 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))613 {614 ASSERT_ARGS(iter_init)615 iter->str = src;616 iter->bytepos = iter->charpos = 0;617 iter->get_and_advance = fixed8_get_next;618 iter->set_and_advance = fixed8_set_next;619 iter->set_position = fixed8_set_position;620 }621 622 623 /*624 625 507 =item C<static size_t fixed_8_hash(PARROT_INTERP, const STRING *s, size_t 626 508 hashval)> 627 509 … … 674 556 get_bytes, 675 557 codepoints, 676 558 bytes, 677 iter_init,678 559 find_cclass, 679 560 fixed_8_hash, 680 561 fixed8_iter_get, -
src/string/encoding/ucs2.c
diff --git a/src/string/encoding/ucs2.c b/src/string/encoding/ucs2.c index 5136d16..3b60fde 100644
a b 84 84 __attribute__nonnull__(1) 85 85 __attribute__nonnull__(2); 86 86 87 static void iter_init(PARROT_INTERP,88 ARGIN(const STRING *src),89 ARGOUT(String_iter *iter))90 __attribute__nonnull__(1)91 __attribute__nonnull__(2)92 __attribute__nonnull__(3)93 FUNC_MODIFIES(*iter);94 95 87 static void set_byte(PARROT_INTERP, 96 88 SHIM(const STRING *src), 97 89 SHIM(UINTVAL offset), … … 104 96 __attribute__nonnull__(1) 105 97 __attribute__nonnull__(2); 106 98 107 static UINTVAL ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))108 __attribute__nonnull__(2)109 FUNC_MODIFIES(*i);110 111 static void ucs2_encode_and_advance(SHIM_INTERP,112 ARGMOD(String_iter *i),113 UINTVAL c)114 __attribute__nonnull__(2)115 FUNC_MODIFIES(*i);116 117 99 static size_t ucs2_hash(PARROT_INTERP, 118 100 ARGIN(const STRING *s), 119 101 size_t hashval) … … 164 146 __attribute__nonnull__(3) 165 147 FUNC_MODIFIES(*i); 166 148 167 static void ucs2_set_position(SHIM_INTERP,168 ARGMOD(String_iter *i),169 UINTVAL n)170 __attribute__nonnull__(2)171 FUNC_MODIFIES(*i);172 173 149 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 174 150 PARROT_ASSERT_ARG(src)) 175 151 #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 189 165 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 190 166 PARROT_ASSERT_ARG(interp) \ 191 167 , PARROT_ASSERT_ARG(src)) 192 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\193 PARROT_ASSERT_ARG(interp) \194 , PARROT_ASSERT_ARG(src) \195 , PARROT_ASSERT_ARG(iter))196 168 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 197 169 PARROT_ASSERT_ARG(interp)) 198 170 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 199 171 PARROT_ASSERT_ARG(interp) \ 200 172 , PARROT_ASSERT_ARG(src)) 201 #define ASSERT_ARGS_ucs2_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\202 PARROT_ASSERT_ARG(i))203 #define ASSERT_ARGS_ucs2_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\204 PARROT_ASSERT_ARG(i))205 173 #define ASSERT_ARGS_ucs2_hash __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 206 174 PARROT_ASSERT_ARG(interp) \ 207 175 , PARROT_ASSERT_ARG(s)) … … 225 193 PARROT_ASSERT_ARG(interp) \ 226 194 , PARROT_ASSERT_ARG(str) \ 227 195 , PARROT_ASSERT_ARG(i)) 228 #define ASSERT_ARGS_ucs2_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\229 PARROT_ASSERT_ARG(i))230 196 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 231 197 /* HEADERIZER END: static */ 232 198 … … 609 575 610 576 /* 611 577 612 =item C<static UINTVAL ucs2_decode_and_advance(PARROT_INTERP, String_iter *i)>613 614 Moves the string iterator C<i> to the next UCS-2 codepoint.615 616 =cut617 618 */619 620 static UINTVAL621 ucs2_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))622 {623 ASSERT_ARGS(ucs2_decode_and_advance)624 625 #if PARROT_HAS_ICU626 const UChar * const s = (const UChar*) i->str->strstart;627 size_t pos = i->bytepos / sizeof (UChar);628 629 /* TODO either make sure that we don't go past end or use SAFE630 * iter versions631 */632 const UChar c = s[pos++];633 i->charpos++;634 i->bytepos = pos * sizeof (UChar);635 return c;636 #else637 /* This function must never be called if compiled without ICU.638 * See TT #557639 */640 PARROT_ASSERT(0);641 UNUSED(i);642 return (UINTVAL)0; /* Stop the static analyzers from panicing */643 #endif644 }645 646 /*647 648 =item C<static void ucs2_encode_and_advance(PARROT_INTERP, String_iter *i,649 UINTVAL c)>650 651 With the string iterator C<i>, appends the codepoint C<c> and advances to the652 next position in the string.653 654 =cut655 656 */657 658 static void659 ucs2_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)660 {661 ASSERT_ARGS(ucs2_encode_and_advance)662 663 #if PARROT_HAS_ICU664 UChar *s = (UChar*) i->str->strstart;665 UINTVAL pos = i->bytepos / sizeof (UChar);666 s[pos++] = (UChar)c;667 i->charpos++;668 i->bytepos = pos * sizeof (UChar);669 #else670 /* This function must never be called if compiled without ICU.671 * See TT #557672 */673 UNUSED(i);674 UNUSED(c);675 PARROT_ASSERT(0);676 #endif677 }678 679 /*680 681 578 =item C<static size_t ucs2_hash(PARROT_INTERP, const STRING *s, size_t hashval)> 682 579 683 580 Returns the hashed value of the string, given a seed in hashval. … … 710 607 #endif 711 608 } 712 609 713 714 /*715 716 =item C<static void ucs2_set_position(PARROT_INTERP, String_iter *i, UINTVAL n)>717 718 Moves the string iterator C<i> to the position C<n> in the string.719 720 =cut721 722 */723 724 static void725 ucs2_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)726 {727 ASSERT_ARGS(ucs2_set_position)728 729 #if PARROT_HAS_ICU730 i->charpos = n;731 i->bytepos = n * sizeof (UChar);732 #else733 /* This function must never be called if compiled without ICU.734 * See TT #557735 */736 UNUSED(i);737 UNUSED(n);738 PARROT_ASSERT(0);739 #endif740 }741 742 743 /*744 745 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter746 *iter)>747 748 Initializes for string C<src> the string iterator C<iter>.749 750 =cut751 752 */753 754 static void755 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))756 {757 ASSERT_ARGS(iter_init)758 #if PARROT_HAS_ICU759 UNUSED(interp);760 iter->str = src;761 iter->bytepos = 0;762 iter->charpos = 0;763 iter->get_and_advance = ucs2_decode_and_advance;764 iter->set_and_advance = ucs2_encode_and_advance;765 iter->set_position = ucs2_set_position;766 #else767 UNUSED(src);768 UNUSED(iter);769 no_ICU_lib(interp);770 #endif771 }772 773 610 /* 774 611 775 612 =item C<void Parrot_encoding_ucs2_init(PARROT_INTERP)> … … 797 634 get_bytes, 798 635 codepoints, 799 636 bytes, 800 iter_init,801 637 find_cclass, 802 638 ucs2_hash, 803 639 ucs2_iter_get, -
src/string/encoding/utf16.c
diff --git a/src/string/encoding/utf16.c b/src/string/encoding/utf16.c index 6df6e96..73fa10f 100644
a b 75 75 __attribute__nonnull__(1) 76 76 __attribute__nonnull__(2); 77 77 78 static void iter_init(PARROT_INTERP,79 ARGIN(const STRING *src),80 ARGOUT(String_iter *iter))81 __attribute__nonnull__(1)82 __attribute__nonnull__(2)83 __attribute__nonnull__(3)84 FUNC_MODIFIES(*iter);85 86 78 static void set_byte(PARROT_INTERP, 87 79 ARGIN(const STRING *src), 88 80 UINTVAL offset, … … 96 88 __attribute__nonnull__(1) 97 89 __attribute__nonnull__(2); 98 90 99 PARROT_WARN_UNUSED_RESULT100 static UINTVAL utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))101 __attribute__nonnull__(2)102 FUNC_MODIFIES(*i);103 104 static void utf16_encode_and_advance(SHIM_INTERP,105 ARGMOD(String_iter *i),106 UINTVAL c)107 __attribute__nonnull__(2)108 FUNC_MODIFIES(*i);109 110 91 static UINTVAL utf16_iter_get(PARROT_INTERP, 111 92 ARGIN(const STRING *str), 112 93 ARGIN(const String_iter *i), … … 152 133 __attribute__nonnull__(3) 153 134 FUNC_MODIFIES(*i); 154 135 155 static void utf16_set_position(SHIM_INTERP,156 ARGMOD(String_iter *i),157 UINTVAL n)158 __attribute__nonnull__(2)159 FUNC_MODIFIES(*i);160 161 136 #define ASSERT_ARGS_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 162 137 PARROT_ASSERT_ARG(src)) 163 138 #define ASSERT_ARGS_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 178 153 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 179 154 PARROT_ASSERT_ARG(interp) \ 180 155 , PARROT_ASSERT_ARG(src)) 181 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\182 PARROT_ASSERT_ARG(interp) \183 , PARROT_ASSERT_ARG(src) \184 , PARROT_ASSERT_ARG(iter))185 156 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 186 157 PARROT_ASSERT_ARG(interp) \ 187 158 , PARROT_ASSERT_ARG(src)) 188 159 #define ASSERT_ARGS_to_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 189 160 PARROT_ASSERT_ARG(interp) \ 190 161 , PARROT_ASSERT_ARG(src)) 191 #define ASSERT_ARGS_utf16_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\192 PARROT_ASSERT_ARG(i))193 #define ASSERT_ARGS_utf16_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\194 PARROT_ASSERT_ARG(i))195 162 #define ASSERT_ARGS_utf16_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 196 163 PARROT_ASSERT_ARG(interp) \ 197 164 , PARROT_ASSERT_ARG(str) \ … … 212 179 PARROT_ASSERT_ARG(interp) \ 213 180 , PARROT_ASSERT_ARG(str) \ 214 181 , PARROT_ASSERT_ARG(i)) 215 #define ASSERT_ARGS_utf16_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\216 PARROT_ASSERT_ARG(i))217 182 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 218 183 /* HEADERIZER END: static */ 219 184 … … 717 682 #endif 718 683 } 719 684 720 #if PARROT_HAS_ICU721 /*722 723 =item C<static UINTVAL utf16_decode_and_advance(PARROT_INTERP, String_iter *i)>724 725 Moves the string iterator C<i> to the next UTF-16 codepoint.726 727 =cut728 729 */730 731 PARROT_WARN_UNUSED_RESULT732 static UINTVAL733 utf16_decode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i))734 {735 ASSERT_ARGS(utf16_decode_and_advance)736 const UChar * const s = (const UChar*) i->str->strstart;737 UINTVAL pos = i->bytepos / sizeof (UChar);738 UINTVAL c;739 740 /* TODO either make sure that we don't go past end or use SAFE741 * iter versions742 */743 U16_NEXT_UNSAFE(s, pos, c);744 i->charpos++;745 i->bytepos = pos * sizeof (UChar);746 return c;747 }748 749 /*750 751 =item C<static void utf16_encode_and_advance(PARROT_INTERP, String_iter *i,752 UINTVAL c)>753 754 With the string iterator C<i>, appends the codepoint C<c> and advances to the755 next position in the string.756 757 =cut758 759 */760 761 static void762 utf16_encode_and_advance(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL c)763 {764 ASSERT_ARGS(utf16_encode_and_advance)765 UChar * const s = (UChar*) i->str->strstart;766 UINTVAL pos = i->bytepos / sizeof (UChar);767 U16_APPEND_UNSAFE(s, pos, c);768 i->charpos++;769 i->bytepos = pos * sizeof (UChar);770 }771 772 /*773 774 =item C<static void utf16_set_position(PARROT_INTERP, String_iter *i, UINTVAL775 n)>776 777 Moves the string iterator C<i> to the position C<n> in the string.778 779 =cut780 781 */782 783 static void784 utf16_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL n)785 {786 ASSERT_ARGS(utf16_set_position)787 UChar * const s = (UChar*) i->str->strstart;788 UINTVAL pos;789 pos = 0;790 U16_FWD_N_UNSAFE(s, pos, n);791 i->charpos = n;792 i->bytepos = pos * sizeof (UChar);793 }794 795 #endif796 797 /*798 799 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter800 *iter)>801 802 Initializes for string C<src> the string iterator C<iter>.803 804 =cut805 806 */807 808 static void809 iter_init(PARROT_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))810 {811 ASSERT_ARGS(iter_init)812 iter->str = src;813 iter->bytepos = iter->charpos = 0;814 #if PARROT_HAS_ICU815 UNUSED(interp);816 iter->get_and_advance = utf16_decode_and_advance;817 iter->set_and_advance = utf16_encode_and_advance;818 iter->set_position = utf16_set_position;819 #else820 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,821 "no ICU lib loaded");822 #endif823 }824 825 685 /* 826 686 827 687 =item C<void Parrot_encoding_utf16_init(PARROT_INTERP)> … … 849 709 get_bytes, 850 710 codepoints, 851 711 bytes, 852 iter_init,853 712 find_cclass, 854 713 NULL, 855 714 utf16_iter_get, -
src/string/encoding/utf8.c
diff --git a/src/string/encoding/utf8.c b/src/string/encoding/utf8.c index 55e1753..e929397 100644
a b 72 72 __attribute__nonnull__(1) 73 73 __attribute__nonnull__(2); 74 74 75 static void iter_init(SHIM_INTERP,76 ARGIN(const STRING *src),77 ARGOUT(String_iter *iter))78 __attribute__nonnull__(2)79 __attribute__nonnull__(3)80 FUNC_MODIFIES(*iter);81 82 75 static void set_byte(PARROT_INTERP, 83 76 ARGIN(const STRING *src), 84 77 UINTVAL offset, … … 101 94 __attribute__nonnull__(1) 102 95 __attribute__nonnull__(2); 103 96 104 static UINTVAL utf8_decode_and_advance(PARROT_INTERP,105 ARGMOD(String_iter *i))106 __attribute__nonnull__(1)107 __attribute__nonnull__(2)108 FUNC_MODIFIES(*i);109 110 97 PARROT_CANNOT_RETURN_NULL 111 98 static void * utf8_encode(PARROT_INTERP, ARGIN(void *ptr), UINTVAL c) 112 99 __attribute__nonnull__(1) 113 100 __attribute__nonnull__(2); 114 101 115 static void utf8_encode_and_advance(PARROT_INTERP,116 ARGMOD(String_iter *i),117 UINTVAL c)118 __attribute__nonnull__(1)119 __attribute__nonnull__(2)120 FUNC_MODIFIES(*i);121 122 102 static UINTVAL utf8_iter_get(PARROT_INTERP, 123 103 ARGIN(const STRING *str), 124 104 ARGIN(const String_iter *i), … … 161 141 __attribute__nonnull__(3) 162 142 FUNC_MODIFIES(*i); 163 143 164 static void utf8_set_position(SHIM_INTERP,165 ARGMOD(String_iter *i),166 UINTVAL pos)167 __attribute__nonnull__(2)168 FUNC_MODIFIES(*i);169 170 144 PARROT_WARN_UNUSED_RESULT 171 145 PARROT_CANNOT_RETURN_NULL 172 146 static const void * utf8_skip_backward(ARGIN(const void *ptr), UINTVAL n) … … 194 168 #define ASSERT_ARGS_get_codepoints __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 195 169 PARROT_ASSERT_ARG(interp) \ 196 170 , PARROT_ASSERT_ARG(src)) 197 #define ASSERT_ARGS_iter_init __attribute__unused__ int _ASSERT_ARGS_CHECK = (\198 PARROT_ASSERT_ARG(src) \199 , PARROT_ASSERT_ARG(iter))200 171 #define ASSERT_ARGS_set_byte __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 201 172 PARROT_ASSERT_ARG(interp) \ 202 173 , PARROT_ASSERT_ARG(src)) … … 209 180 #define ASSERT_ARGS_utf8_decode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 210 181 PARROT_ASSERT_ARG(interp) \ 211 182 , PARROT_ASSERT_ARG(ptr)) 212 #define ASSERT_ARGS_utf8_decode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\213 PARROT_ASSERT_ARG(interp) \214 , PARROT_ASSERT_ARG(i))215 183 #define ASSERT_ARGS_utf8_encode __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 216 184 PARROT_ASSERT_ARG(interp) \ 217 185 , PARROT_ASSERT_ARG(ptr)) 218 #define ASSERT_ARGS_utf8_encode_and_advance __attribute__unused__ int _ASSERT_ARGS_CHECK = (\219 PARROT_ASSERT_ARG(interp) \220 , PARROT_ASSERT_ARG(i))221 186 #define ASSERT_ARGS_utf8_iter_get __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 222 187 PARROT_ASSERT_ARG(interp) \ 223 188 , PARROT_ASSERT_ARG(str) \ … … 236 201 #define ASSERT_ARGS_utf8_iter_skip __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 237 202 PARROT_ASSERT_ARG(str) \ 238 203 , PARROT_ASSERT_ARG(i)) 239 #define ASSERT_ARGS_utf8_set_position __attribute__unused__ int _ASSERT_ARGS_CHECK = (\240 PARROT_ASSERT_ARG(i))241 204 #define ASSERT_ARGS_utf8_skip_backward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 242 205 PARROT_ASSERT_ARG(ptr)) 243 206 #define ASSERT_ARGS_utf8_skip_forward __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ … … 634 597 i->bytepos = (const char *)u8ptr - (const char *)str->strstart; 635 598 } 636 599 637 /*638 639 =item C<static UINTVAL utf8_decode_and_advance(PARROT_INTERP, String_iter *i)>640 641 The UTF-8 implementation of the string iterator's C<get_and_advance>642 function.643 644 =cut645 646 */647 648 static UINTVAL649 utf8_decode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i))650 {651 ASSERT_ARGS(utf8_decode_and_advance)652 const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);653 UINTVAL c = *u8ptr;654 655 if (UTF8_IS_START(c)) {656 UINTVAL len = UTF8SKIP(u8ptr);657 658 c &= UTF8_START_MASK(len);659 i->bytepos += len;660 for (len--; len; len--) {661 u8ptr++;662 663 if (!UTF8_IS_CONTINUATION(*u8ptr))664 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,665 "Malformed UTF-8 string\n");666 667 c = UTF8_ACCUMULATE(c, *u8ptr);668 }669 670 if (UNICODE_IS_SURROGATE(c))671 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,672 "Surrogate in UTF-8 string\n");673 }674 else if (!UNICODE_IS_INVARIANT(c)) {675 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,676 "Malformed UTF-8 string\n");677 }678 else {679 i->bytepos++;680 }681 682 i->charpos++;683 return c;684 }685 686 /*687 688 =item C<static void utf8_encode_and_advance(PARROT_INTERP, String_iter *i,689 UINTVAL c)>690 691 The UTF-8 implementation of the string iterator's C<set_and_advance>692 function.693 694 =cut695 696 */697 698 static void699 utf8_encode_and_advance(PARROT_INTERP, ARGMOD(String_iter *i), UINTVAL c)700 {701 ASSERT_ARGS(utf8_encode_and_advance)702 const STRING * const s = i->str;703 unsigned char * const pos = (unsigned char *)s->strstart + i->bytepos;704 unsigned char * const new_pos = (unsigned char *)utf8_encode(interp, pos, c);705 706 i->bytepos += (new_pos - pos);707 /* XXX possible buffer overrun exception? */708 PARROT_ASSERT(i->bytepos <= Buffer_buflen(s));709 i->charpos++;710 }711 712 /*713 714 =item C<static void utf8_set_position(PARROT_INTERP, String_iter *i, UINTVAL715 pos)>716 717 The UTF-8 implementation of the string iterator's C<set_position>718 function.719 720 =cut721 722 */723 724 static void725 utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos)726 {727 ASSERT_ARGS(utf8_set_position)728 const utf8_t *u8ptr = (const utf8_t *)i->str->strstart;729 730 /* start from last known charpos, if we can */731 if (i->charpos <= pos) {732 const UINTVAL old_pos = pos;733 pos -= i->charpos;734 u8ptr += i->bytepos;735 i->charpos = old_pos;736 }737 else738 i->charpos = pos;739 740 while (pos-- > 0)741 u8ptr += UTF8SKIP(u8ptr);742 743 i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;744 }745 746 600 747 601 /* 748 602 … … 1026 880 1027 881 /* 1028 882 1029 =item C<static void iter_init(PARROT_INTERP, const STRING *src, String_iter1030 *iter)>1031 1032 Initializes for string C<src> the string iterator C<iter>.1033 1034 =cut1035 1036 */1037 1038 static void1039 iter_init(SHIM_INTERP, ARGIN(const STRING *src), ARGOUT(String_iter *iter))1040 {1041 ASSERT_ARGS(iter_init)1042 iter->str = src;1043 iter->bytepos = 0;1044 iter->charpos = 0;1045 iter->get_and_advance = utf8_decode_and_advance;1046 iter->set_and_advance = utf8_encode_and_advance;1047 iter->set_position = utf8_set_position;1048 }1049 1050 /*1051 1052 883 =item C<void Parrot_encoding_utf8_init(PARROT_INTERP)> 1053 884 1054 885 Initializes the UTF-8 encoding. … … 1074 905 get_bytes, 1075 906 codepoints, 1076 907 bytes, 1077 iter_init,1078 908 find_cclass, 1079 909 NULL, 1080 910 utf8_iter_get,