diff options
| -rw-r--r-- | array.c | 68 | ||||
| -rw-r--r-- | benchmark/array_join.yml | 34 | ||||
| -rw-r--r-- | benchmark/array_join_regression.yml | 44 | ||||
| -rw-r--r-- | depend | 2 | ||||
| -rw-r--r-- | test/ruby/test_array.rb | 54 |
5 files changed, 201 insertions, 1 deletions
@@ -23,6 +23,7 @@ #include "internal/object.h" #include "internal/proc.h" #include "internal/rational.h" +#include "internal/string.h" #include "internal/vm.h" #include "probes.h" #include "ruby/encoding.h" @@ -2993,6 +2994,67 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first) } } +/* Fast path for Array#join: when every element is a String in one fast-path encoding + * (UTF-8 / US-ASCII / ASCII-8BIT) and the separator is byte-compatible, the result can + * be produced with a single memcpy pass instead of appending each element through + * rb_str_buf_append. Returns the joined String, or Qundef when any of those invariants + * does not hold -- the caller then uses the general path. No user code runs here, so + * the array cannot be mutated underneath us. */ +static VALUE +ary_join_fast(VALUE ary, VALUE sep) +{ + long n = RARRAY_LEN(ary); + if (n == 0) return Qundef; + + VALUE first = RARRAY_AREF(ary, 0); + if (!RB_TYPE_P(first, T_STRING)) return Qundef; + int encidx = ENCODING_GET(first); + if (!rb_str_encindex_fastpath(encidx)) return Qundef; + + /* cr accumulates the result code range exactly as rb_str_buf_append would. */ + enum ruby_coderange_type cr = ENC_CODERANGE_7BIT; + long sep_len = 0; + const char *sep_ptr = NULL; + if (!NIL_P(sep)) { + int sep_cr = rb_enc_str_coderange(sep); + /* The separator must share the element encoding, or be 7-bit (encidx is + ASCII-compatible, so a 7-bit separator concatenates without negotiation). */ + if (ENCODING_GET(sep) != encidx && sep_cr != ENC_CODERANGE_7BIT) return Qundef; + sep_ptr = RSTRING_PTR(sep); + sep_len = RSTRING_LEN(sep); + if (n > 1) cr = ENC_CODERANGE_AND(cr, sep_cr); + } + + /* One pass: confirm the shared encoding, measure the length, merge code ranges. */ + long len = 1 + sep_len * (n - 1); + for (long i = 0; i < n; i++) { + VALUE s = RARRAY_AREF(ary, i); + if (!RB_TYPE_P(s, T_STRING) || ENCODING_GET(s) != encidx) return Qundef; + len += RSTRING_LEN(s); + cr = ENC_CODERANGE_AND(cr, rb_enc_str_coderange(s)); + } + + VALUE result = rb_str_buf_new(len); + rb_enc_associate_index(result, encidx); + char *const buf = RSTRING_PTR(result); + char *p = buf; + for (long i = 0; i < n; i++) { + VALUE s = RARRAY_AREF(ary, i); + long slen = RSTRING_LEN(s); + if (i > 0 && sep_len) { + memcpy(p, sep_ptr, sep_len); + p += sep_len; + } + memcpy(p, RSTRING_PTR(s), slen); + p += slen; + } + + ENC_CODERANGE_CLEAR(result); /* keep rb_str_set_len from rescanning the bytes */ + rb_str_set_len(result, p - buf); + ENC_CODERANGE_SET(result, cr); + return result; +} + VALUE rb_ary_join(VALUE ary, VALUE sep) { @@ -3001,8 +3063,12 @@ rb_ary_join(VALUE ary, VALUE sep) if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new(0, 0); + if (!NIL_P(sep)) StringValue(sep); + + result = ary_join_fast(ary, sep); + if (!UNDEF_P(result)) return result; + if (!NIL_P(sep)) { - StringValue(sep); len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1); } long len_memo = RARRAY_LEN(ary); diff --git a/benchmark/array_join.yml b/benchmark/array_join.yml new file mode 100644 index 0000000000..65d718e9af --- /dev/null +++ b/benchmark/array_join.yml @@ -0,0 +1,34 @@ +prelude: | + # All elements are 7-bit ASCII Strings (the dominant real-world join case). + # Distinct objects so large-N cases exercise realistic heap/cache behavior. + a100_1 = Array.new(100) { "a" * 1 } + a100_8 = Array.new(100) { "a" * 8 } + a100_40 = Array.new(100) { "a" * 40 } + a1k_1 = Array.new(1000) { "a" * 1 } + a1k_8 = Array.new(1000) { "a" * 8 } + a1k_40 = Array.new(1000) { "a" * 40 } + a100k_1 = Array.new(100000) { "a" * 1 } + a100k_8 = Array.new(100000) { "a" * 8 } + a100k_40 = Array.new(100000) { "a" * 40 } + +benchmark: + # separator " " (space) + join_sp_n100_e1: a100_1.join(" ") + join_sp_n100_e8: a100_8.join(" ") + join_sp_n100_e40: a100_40.join(" ") + join_sp_n1k_e1: a1k_1.join(" ") + join_sp_n1k_e8: a1k_8.join(" ") + join_sp_n1k_e40: a1k_40.join(" ") + join_sp_n100k_e1: a100k_1.join(" ") + join_sp_n100k_e8: a100k_8.join(" ") + join_sp_n100k_e40: a100k_40.join(" ") + # separator "\n" (newline) + join_nl_n100_e1: a100_1.join("\n") + join_nl_n100_e8: a100_8.join("\n") + join_nl_n100_e40: a100_40.join("\n") + join_nl_n1k_e1: a1k_1.join("\n") + join_nl_n1k_e8: a1k_8.join("\n") + join_nl_n1k_e40: a1k_40.join("\n") + join_nl_n100k_e1: a100k_1.join("\n") + join_nl_n100k_e8: a100k_8.join("\n") + join_nl_n100k_e40: a100k_40.join("\n") diff --git a/benchmark/array_join_regression.yml b/benchmark/array_join_regression.yml new file mode 100644 index 0000000000..5cbfa6867f --- /dev/null +++ b/benchmark/array_join_regression.yml @@ -0,0 +1,44 @@ +prelude: | + # --- must NOT take any fast path: prove added precompute-loop checks don't regress --- + # all multibyte UTF-8 (VALID coderange). 7-bit gate bails at elem 0; same-encoding gate TAKES it. + mb_utf8 = Array.new(1000) { "café" } + # ASCII except a trailing multibyte elem: WORST CASE for 7-bit gate (scans all, then falls back). + tail_mb = Array.new(1000) { "a" * 8 }; tail_mb[-1] = "café" + # non-String elements -> ary_join_1 (to_s). Gate bails at type check before coderange. + nonstr_int = Array.new(1000) { |i| i } + # ASCII strings except a trailing Integer: precompute scans strings, then mixed fallback. + mixed_tail = (Array.new(999) { "a" * 8 } << 42) + # UTF-16LE: not ASCII-compatible. Both gates bail immediately. + utf16 = Array.new(1000) { "ab".encode("UTF-16LE") } + # nested arrays -> recursive fallback. + nested = Array.new(500) { [1, 2] } + + # 7-bit ASCII elements but a multibyte separator (3-byte em dash). Both gates bail at sep check. + ascii_elems = Array.new(1000) { "a" * 8 } + sep_mb = "—" + + # --- fast-path ELIGIBLE variants: confirm benefit generalizes beyond single chars / find crossover --- + # frozen ASCII elements (read-only path). + frozen_elems = Array.new(1000) { ("a" * 8).freeze } + # large elements: where memcpy should dominate and the win decays toward 1x. + big_e256 = Array.new(1000) { "a" * 256 } + big_e1k = Array.new(1000) { "a" * 1024 } + big_e4k = Array.new(1000) { "a" * 4096 } + +benchmark: + # regression (fallback / gate worst-case) — all use a 1-byte ASCII separator unless noted + reg_multibyte_utf8: mb_utf8.join(" ") + reg_tail_multibyte: tail_mb.join(" ") + reg_nonstring_int: nonstr_int.join(" ") + reg_mixed_tail_int: mixed_tail.join(" ") + reg_utf16: utf16.join(" ".encode("UTF-16LE")) + reg_nested: nested.join(" ") + reg_multibyte_sep: ascii_elems.join(sep_mb) + + # fast-path eligible variants + fp_frozen: frozen_elems.join(" ") + fp_nosep: ascii_elems.join + fp_multichar_sep: ascii_elems.join(", ") + fp_big_e256: big_e256.join(" ") + fp_big_e1k: big_e1k.join(" ") + fp_big_e4k: big_e4k.join(" ") @@ -80,6 +80,7 @@ array.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h array.$(OBJEXT): $(top_srcdir)/internal/serial.h array.$(OBJEXT): $(top_srcdir)/internal/set_table.h array.$(OBJEXT): $(top_srcdir)/internal/static_assert.h +array.$(OBJEXT): $(top_srcdir)/internal/string.h array.$(OBJEXT): $(top_srcdir)/internal/struct.h array.$(OBJEXT): $(top_srcdir)/internal/variable.h array.$(OBJEXT): $(top_srcdir)/internal/vm.h @@ -102,6 +103,7 @@ array.$(OBJEXT): {$(VPATH)}config.h array.$(OBJEXT): {$(VPATH)}constant.h array.$(OBJEXT): {$(VPATH)}debug_counter.h array.$(OBJEXT): {$(VPATH)}defines.h +array.$(OBJEXT): {$(VPATH)}encindex.h array.$(OBJEXT): {$(VPATH)}encoding.h array.$(OBJEXT): {$(VPATH)}id.h array.$(OBJEXT): {$(VPATH)}id_table.h diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb index 76455187a5..36f8801614 100644 --- a/test/ruby/test_array.rb +++ b/test/ruby/test_array.rb @@ -2812,6 +2812,60 @@ class TestArray < Test::Unit::TestCase assert_equal("b", x.ary.join("")) end + def test_join_encoding + # Multibyte UTF-8 elements: result is UTF-8, valid, not ASCII-only. + r = @cls["caf\u00e9", "na\u00efve"].join(" ") + assert_equal("caf\u00e9 na\u00efve", r) + assert_equal(Encoding::UTF_8, r.encoding) + assert_equal(true, r.valid_encoding?) + assert_equal(false, r.ascii_only?) + + # All 7-bit content stays ASCII-only. + r = @cls["abc", "def"].join(",") + assert_equal("abc,def", r) + assert_equal(true, r.ascii_only?) + + # Multibyte separator (same encoding as the elements). + r = @cls["a", "b", "c"].join("\u2014") + assert_equal("a\u2014b\u2014c", r) + assert_equal(Encoding::UTF_8, r.encoding) + assert_equal(false, r.ascii_only?) + assert_equal(true, r.valid_encoding?) + + # Mixed ASCII-compatible encodings, all 7-bit: result takes element 0's encoding. + r = @cls["abc".dup.force_encoding("US-ASCII"), "def".dup.force_encoding("UTF-8")].join(" ") + assert_equal("abc def", r) + assert_equal(Encoding::US_ASCII, r.encoding) + assert_equal(true, r.ascii_only?) + + # 7-bit content in a non-ASCII encoding (Shift_JIS). + r = @cls["abc".encode("Shift_JIS"), "def".encode("Shift_JIS")].join(" ") + assert_equal(Encoding::Shift_JIS, r.encoding) + assert_equal("abc def".b, r.b) + + # Same-encoding multibyte (Shift_JIS): byte-for-byte concatenation. + s = "\u65e5\u672c".encode("Shift_JIS") + sep = "/".encode("Shift_JIS") + r = @cls[s, s].join(sep) + assert_equal(Encoding::Shift_JIS, r.encoding) + assert_equal((s + sep + s).b, r.b) + + # Broken bytes: result keeps them and reports invalid. + bad = "\xff\xfe".dup.force_encoding("UTF-8") + r = @cls[bad, bad].join(" ") + assert_equal((bad + " " + bad).b, r.b) + assert_equal(false, r.valid_encoding?) + + # Incompatible element/separator encodings still raise. + assert_raise(Encoding::CompatibilityError) do + @cls["a".dup.force_encoding("UTF-8"), "b".encode("UTF-16LE")].join(" ") + end + + # Bulk copy: large array, verified against a non-join oracle. + big = @cls.new(500) { "ab" } + assert_equal(("ab," * 500).chomp(","), big.join(",")) + end + def test_to_a2 klass = Class.new(Array) a = klass.new.to_a |
