summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--array.c68
-rw-r--r--benchmark/array_join.yml34
-rw-r--r--benchmark/array_join_regression.yml44
-rw-r--r--depend2
-rw-r--r--test/ruby/test_array.rb54
5 files changed, 201 insertions, 1 deletions
diff --git a/array.c b/array.c
index 42aba64abe..02032668f3 100644
--- a/array.c
+++ b/array.c
@@ -23,6 +23,7 @@
#include "internal/object.h"
#include "internal/proc.h"
#include "internal/rational.h"
+#include "internal/string.h"
#include "internal/vm.h"
#include "probes.h"
#include "ruby/encoding.h"
@@ -2993,6 +2994,67 @@ ary_join_1(VALUE obj, VALUE ary, VALUE sep, long i, VALUE result, int *first)
}
}
+/* Fast path for Array#join: when every element is a String in one fast-path encoding
+ * (UTF-8 / US-ASCII / ASCII-8BIT) and the separator is byte-compatible, the result can
+ * be produced with a single memcpy pass instead of appending each element through
+ * rb_str_buf_append. Returns the joined String, or Qundef when any of those invariants
+ * does not hold -- the caller then uses the general path. No user code runs here, so
+ * the array cannot be mutated underneath us. */
+static VALUE
+ary_join_fast(VALUE ary, VALUE sep)
+{
+ long n = RARRAY_LEN(ary);
+ if (n == 0) return Qundef;
+
+ VALUE first = RARRAY_AREF(ary, 0);
+ if (!RB_TYPE_P(first, T_STRING)) return Qundef;
+ int encidx = ENCODING_GET(first);
+ if (!rb_str_encindex_fastpath(encidx)) return Qundef;
+
+ /* cr accumulates the result code range exactly as rb_str_buf_append would. */
+ enum ruby_coderange_type cr = ENC_CODERANGE_7BIT;
+ long sep_len = 0;
+ const char *sep_ptr = NULL;
+ if (!NIL_P(sep)) {
+ int sep_cr = rb_enc_str_coderange(sep);
+ /* The separator must share the element encoding, or be 7-bit (encidx is
+ ASCII-compatible, so a 7-bit separator concatenates without negotiation). */
+ if (ENCODING_GET(sep) != encidx && sep_cr != ENC_CODERANGE_7BIT) return Qundef;
+ sep_ptr = RSTRING_PTR(sep);
+ sep_len = RSTRING_LEN(sep);
+ if (n > 1) cr = ENC_CODERANGE_AND(cr, sep_cr);
+ }
+
+ /* One pass: confirm the shared encoding, measure the length, merge code ranges. */
+ long len = 1 + sep_len * (n - 1);
+ for (long i = 0; i < n; i++) {
+ VALUE s = RARRAY_AREF(ary, i);
+ if (!RB_TYPE_P(s, T_STRING) || ENCODING_GET(s) != encidx) return Qundef;
+ len += RSTRING_LEN(s);
+ cr = ENC_CODERANGE_AND(cr, rb_enc_str_coderange(s));
+ }
+
+ VALUE result = rb_str_buf_new(len);
+ rb_enc_associate_index(result, encidx);
+ char *const buf = RSTRING_PTR(result);
+ char *p = buf;
+ for (long i = 0; i < n; i++) {
+ VALUE s = RARRAY_AREF(ary, i);
+ long slen = RSTRING_LEN(s);
+ if (i > 0 && sep_len) {
+ memcpy(p, sep_ptr, sep_len);
+ p += sep_len;
+ }
+ memcpy(p, RSTRING_PTR(s), slen);
+ p += slen;
+ }
+
+ ENC_CODERANGE_CLEAR(result); /* keep rb_str_set_len from rescanning the bytes */
+ rb_str_set_len(result, p - buf);
+ ENC_CODERANGE_SET(result, cr);
+ return result;
+}
+
VALUE
rb_ary_join(VALUE ary, VALUE sep)
{
@@ -3001,8 +3063,12 @@ rb_ary_join(VALUE ary, VALUE sep)
if (RARRAY_LEN(ary) == 0) return rb_usascii_str_new(0, 0);
+ if (!NIL_P(sep)) StringValue(sep);
+
+ result = ary_join_fast(ary, sep);
+ if (!UNDEF_P(result)) return result;
+
if (!NIL_P(sep)) {
- StringValue(sep);
len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1);
}
long len_memo = RARRAY_LEN(ary);
diff --git a/benchmark/array_join.yml b/benchmark/array_join.yml
new file mode 100644
index 0000000000..65d718e9af
--- /dev/null
+++ b/benchmark/array_join.yml
@@ -0,0 +1,34 @@
+prelude: |
+ # All elements are 7-bit ASCII Strings (the dominant real-world join case).
+ # Distinct objects so large-N cases exercise realistic heap/cache behavior.
+ a100_1 = Array.new(100) { "a" * 1 }
+ a100_8 = Array.new(100) { "a" * 8 }
+ a100_40 = Array.new(100) { "a" * 40 }
+ a1k_1 = Array.new(1000) { "a" * 1 }
+ a1k_8 = Array.new(1000) { "a" * 8 }
+ a1k_40 = Array.new(1000) { "a" * 40 }
+ a100k_1 = Array.new(100000) { "a" * 1 }
+ a100k_8 = Array.new(100000) { "a" * 8 }
+ a100k_40 = Array.new(100000) { "a" * 40 }
+
+benchmark:
+ # separator " " (space)
+ join_sp_n100_e1: a100_1.join(" ")
+ join_sp_n100_e8: a100_8.join(" ")
+ join_sp_n100_e40: a100_40.join(" ")
+ join_sp_n1k_e1: a1k_1.join(" ")
+ join_sp_n1k_e8: a1k_8.join(" ")
+ join_sp_n1k_e40: a1k_40.join(" ")
+ join_sp_n100k_e1: a100k_1.join(" ")
+ join_sp_n100k_e8: a100k_8.join(" ")
+ join_sp_n100k_e40: a100k_40.join(" ")
+ # separator "\n" (newline)
+ join_nl_n100_e1: a100_1.join("\n")
+ join_nl_n100_e8: a100_8.join("\n")
+ join_nl_n100_e40: a100_40.join("\n")
+ join_nl_n1k_e1: a1k_1.join("\n")
+ join_nl_n1k_e8: a1k_8.join("\n")
+ join_nl_n1k_e40: a1k_40.join("\n")
+ join_nl_n100k_e1: a100k_1.join("\n")
+ join_nl_n100k_e8: a100k_8.join("\n")
+ join_nl_n100k_e40: a100k_40.join("\n")
diff --git a/benchmark/array_join_regression.yml b/benchmark/array_join_regression.yml
new file mode 100644
index 0000000000..5cbfa6867f
--- /dev/null
+++ b/benchmark/array_join_regression.yml
@@ -0,0 +1,44 @@
+prelude: |
+ # --- must NOT take any fast path: prove added precompute-loop checks don't regress ---
+ # all multibyte UTF-8 (VALID coderange). 7-bit gate bails at elem 0; same-encoding gate TAKES it.
+ mb_utf8 = Array.new(1000) { "café" }
+ # ASCII except a trailing multibyte elem: WORST CASE for 7-bit gate (scans all, then falls back).
+ tail_mb = Array.new(1000) { "a" * 8 }; tail_mb[-1] = "café"
+ # non-String elements -> ary_join_1 (to_s). Gate bails at type check before coderange.
+ nonstr_int = Array.new(1000) { |i| i }
+ # ASCII strings except a trailing Integer: precompute scans strings, then mixed fallback.
+ mixed_tail = (Array.new(999) { "a" * 8 } << 42)
+ # UTF-16LE: not ASCII-compatible. Both gates bail immediately.
+ utf16 = Array.new(1000) { "ab".encode("UTF-16LE") }
+ # nested arrays -> recursive fallback.
+ nested = Array.new(500) { [1, 2] }
+
+ # 7-bit ASCII elements but a multibyte separator (3-byte em dash). Both gates bail at sep check.
+ ascii_elems = Array.new(1000) { "a" * 8 }
+ sep_mb = "—"
+
+ # --- fast-path ELIGIBLE variants: confirm benefit generalizes beyond single chars / find crossover ---
+ # frozen ASCII elements (read-only path).
+ frozen_elems = Array.new(1000) { ("a" * 8).freeze }
+ # large elements: where memcpy should dominate and the win decays toward 1x.
+ big_e256 = Array.new(1000) { "a" * 256 }
+ big_e1k = Array.new(1000) { "a" * 1024 }
+ big_e4k = Array.new(1000) { "a" * 4096 }
+
+benchmark:
+ # regression (fallback / gate worst-case) — all use a 1-byte ASCII separator unless noted
+ reg_multibyte_utf8: mb_utf8.join(" ")
+ reg_tail_multibyte: tail_mb.join(" ")
+ reg_nonstring_int: nonstr_int.join(" ")
+ reg_mixed_tail_int: mixed_tail.join(" ")
+ reg_utf16: utf16.join(" ".encode("UTF-16LE"))
+ reg_nested: nested.join(" ")
+ reg_multibyte_sep: ascii_elems.join(sep_mb)
+
+ # fast-path eligible variants
+ fp_frozen: frozen_elems.join(" ")
+ fp_nosep: ascii_elems.join
+ fp_multichar_sep: ascii_elems.join(", ")
+ fp_big_e256: big_e256.join(" ")
+ fp_big_e1k: big_e1k.join(" ")
+ fp_big_e4k: big_e4k.join(" ")
diff --git a/depend b/depend
index a2e8312298..7ae13bd63d 100644
--- a/depend
+++ b/depend
@@ -80,6 +80,7 @@ array.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
array.$(OBJEXT): $(top_srcdir)/internal/serial.h
array.$(OBJEXT): $(top_srcdir)/internal/set_table.h
array.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
+array.$(OBJEXT): $(top_srcdir)/internal/string.h
array.$(OBJEXT): $(top_srcdir)/internal/struct.h
array.$(OBJEXT): $(top_srcdir)/internal/variable.h
array.$(OBJEXT): $(top_srcdir)/internal/vm.h
@@ -102,6 +103,7 @@ array.$(OBJEXT): {$(VPATH)}config.h
array.$(OBJEXT): {$(VPATH)}constant.h
array.$(OBJEXT): {$(VPATH)}debug_counter.h
array.$(OBJEXT): {$(VPATH)}defines.h
+array.$(OBJEXT): {$(VPATH)}encindex.h
array.$(OBJEXT): {$(VPATH)}encoding.h
array.$(OBJEXT): {$(VPATH)}id.h
array.$(OBJEXT): {$(VPATH)}id_table.h
diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb
index 76455187a5..36f8801614 100644
--- a/test/ruby/test_array.rb
+++ b/test/ruby/test_array.rb
@@ -2812,6 +2812,60 @@ class TestArray < Test::Unit::TestCase
assert_equal("b", x.ary.join(""))
end
+ def test_join_encoding
+ # Multibyte UTF-8 elements: result is UTF-8, valid, not ASCII-only.
+ r = @cls["caf\u00e9", "na\u00efve"].join(" ")
+ assert_equal("caf\u00e9 na\u00efve", r)
+ assert_equal(Encoding::UTF_8, r.encoding)
+ assert_equal(true, r.valid_encoding?)
+ assert_equal(false, r.ascii_only?)
+
+ # All 7-bit content stays ASCII-only.
+ r = @cls["abc", "def"].join(",")
+ assert_equal("abc,def", r)
+ assert_equal(true, r.ascii_only?)
+
+ # Multibyte separator (same encoding as the elements).
+ r = @cls["a", "b", "c"].join("\u2014")
+ assert_equal("a\u2014b\u2014c", r)
+ assert_equal(Encoding::UTF_8, r.encoding)
+ assert_equal(false, r.ascii_only?)
+ assert_equal(true, r.valid_encoding?)
+
+ # Mixed ASCII-compatible encodings, all 7-bit: result takes element 0's encoding.
+ r = @cls["abc".dup.force_encoding("US-ASCII"), "def".dup.force_encoding("UTF-8")].join(" ")
+ assert_equal("abc def", r)
+ assert_equal(Encoding::US_ASCII, r.encoding)
+ assert_equal(true, r.ascii_only?)
+
+ # 7-bit content in a non-ASCII encoding (Shift_JIS).
+ r = @cls["abc".encode("Shift_JIS"), "def".encode("Shift_JIS")].join(" ")
+ assert_equal(Encoding::Shift_JIS, r.encoding)
+ assert_equal("abc def".b, r.b)
+
+ # Same-encoding multibyte (Shift_JIS): byte-for-byte concatenation.
+ s = "\u65e5\u672c".encode("Shift_JIS")
+ sep = "/".encode("Shift_JIS")
+ r = @cls[s, s].join(sep)
+ assert_equal(Encoding::Shift_JIS, r.encoding)
+ assert_equal((s + sep + s).b, r.b)
+
+ # Broken bytes: result keeps them and reports invalid.
+ bad = "\xff\xfe".dup.force_encoding("UTF-8")
+ r = @cls[bad, bad].join(" ")
+ assert_equal((bad + " " + bad).b, r.b)
+ assert_equal(false, r.valid_encoding?)
+
+ # Incompatible element/separator encodings still raise.
+ assert_raise(Encoding::CompatibilityError) do
+ @cls["a".dup.force_encoding("UTF-8"), "b".encode("UTF-16LE")].join(" ")
+ end
+
+ # Bulk copy: large array, verified against a non-join oracle.
+ big = @cls.new(500) { "ab" }
+ assert_equal(("ab," * 500).chomp(","), big.join(","))
+ end
+
def test_to_a2
klass = Class.new(Array)
a = klass.new.to_a