Index: src/string/api.c =================================================================== --- src/string/api.c (revision 39517) +++ src/string/api.c (working copy) @@ -410,6 +410,35 @@ */ +static int +destination_aux(ARGIN(const STRING *a), ARGIN(const STRING *b), + ARGOUT(const CHARSET **c), ARGOUT(const ENCODING **e)) +{ + if (a->encoding == b->encoding && a->charset == b->charset) { + *c = a ->charset; + *e = a->encoding; + return 1; + } + if (a->encoding == Parrot_utf8_encoding_ptr && b->encoding == Parrot_fixed_8_encoding_ptr) { + *c = a ->charset; + *e = a->encoding; + return 1; + } + return 0; +} + +static void +destination(ARGIN(const STRING *a), ARGIN(const STRING *b), + ARGOUT(const CHARSET **c), ARGOUT(const ENCODING **e)) +{ + if (destination_aux(a, b, c, e)) + return; + if (destination_aux(b, a, c, e)) + return; + *c = Parrot_unicode_charset_ptr; + *e = Parrot_utf16_encoding_ptr; +} + PARROT_EXPORT PARROT_WARN_UNUSED_RESULT PARROT_CAN_RETURN_NULL @@ -425,18 +454,14 @@ /* a table could possibly simplify the logic */ if (a->encoding == Parrot_utf8_encoding_ptr && - (b->charset == Parrot_ascii_charset_ptr || - b->charset == Parrot_iso_8859_1_charset_ptr)) { - if (a->strlen == a->bufused) { - *e = Parrot_fixed_8_encoding_ptr; - return Parrot_ascii_charset_ptr; - } + (b->charset == Parrot_ascii_charset_ptr /*|| + b->charset == Parrot_iso_8859_1_charset_ptr*/)) { *e = a->encoding; return a->charset; } if (b->encoding == Parrot_utf8_encoding_ptr && - (a->charset == Parrot_ascii_charset_ptr || - a->charset == Parrot_iso_8859_1_charset_ptr)) { + (a->charset == Parrot_ascii_charset_ptr /*|| + a->charset == Parrot_iso_8859_1_charset_ptr*/)) { if (b->strlen == b->bufused) { *e = Parrot_fixed_8_encoding_ptr; return a->charset; @@ -561,14 +586,17 @@ a->encoding = enc; } else { - /* upgrade to utf16 */ - Parrot_utf16_encoding_ptr->to_encoding(interp, a, NULL); - b = Parrot_utf16_encoding_ptr->to_encoding(interp, b, - Parrot_gc_new_string_header(interp, 0)); - - /* result could be mixed ucs2 / utf16 */ - if (b->encoding == Parrot_utf16_encoding_ptr) - a->encoding = Parrot_utf16_encoding_ptr; + enc = NULL; + destination(a, b, &cs, &enc); + /* upgrade to chosen destination */ + if (cs != a->charset) + a = cs->to_charset(interp, a, NULL); + if (enc != a->encoding) + a = enc->to_encoding(interp, a, NULL); + if (cs != b->charset) + b = cs->to_charset(interp, b, NULL); + if (enc != b->encoding) + b = enc->to_encoding(interp, b, NULL); } /* calc usable and total bytes */