summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNARUSE, Yui <nurse@users.noreply.github.com>2024-03-20 22:40:50 +0900
committerGitHub <noreply@github.com>2024-03-20 13:40:50 +0000
commitc626c201e4129bbea17583ecef73472c6f668c81 (patch)
tree04db541d0340a11e8ad842ee63adf30094ca1463
parentce372be903e5f3597f1dc83cb558f165850b3bee (diff)
merge revision(s) 01bfd1a2bf013a9ed92a9722ac5228187e05e6a8,1c120efe02d079b0a1dea573cf0fd7978d9cc857,31378dc0969f4466b2122d730b7298dd7004acdf: [Backport #20228] (#10301)
Fix memory leak in OnigRegion when match raises [Bug #20228] rb_reg_onig_match can raise a Regexp::TimeoutError, which would cause the OnigRegion to leak. Fix memory leak in stk_base when Regexp timeout [Bug #20228] If rb_reg_check_timeout raises a Regexp::TimeoutError, then the stk_base will leak. Add memory leak test for Regexp timeout [Bug #20228]
-rw-r--r--re.c71
-rw-r--r--regexec.c7
-rw-r--r--regint.h20
-rw-r--r--test/ruby/test_regexp.rb17
4 files changed, 92 insertions, 23 deletions
diff --git a/re.c b/re.c
index bf4dc5ccbf..a19dcb920d 100644
--- a/re.c
+++ b/re.c
@@ -88,6 +88,9 @@ static const char casetable[] = {
# error >>> "You lose. You will need a translation table for your character set." <<<
#endif
+// The process-global timeout for regexp matching
+rb_hrtime_t rb_reg_match_time_limit = 0;
+
int
rb_memcicmp(const void *x, const void *y, long len)
{
@@ -1732,6 +1735,23 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p
ONIG_OPTION_NONE);
}
+struct rb_reg_onig_match_args {
+ VALUE re;
+ VALUE str;
+ struct reg_onig_search_args args;
+ struct re_registers regs;
+
+ OnigPosition result;
+};
+
+static VALUE
+rb_reg_onig_match_try(VALUE value_args)
+{
+ struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
+ args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
+ return Qnil;
+}
+
/* returns byte offset */
static long
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
@@ -1742,22 +1762,38 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
return -1;
}
- struct reg_onig_search_args args = {
- .pos = pos,
- .range = reverse ? 0 : len,
+ struct rb_reg_onig_match_args args = {
+ .re = re,
+ .str = str,
+ .args = {
+ .pos = pos,
+ .range = reverse ? 0 : len,
+ },
+ .regs = {0}
};
- struct re_registers regs = {0};
+ /* If there is a timeout set, then rb_reg_onig_match could raise a
+ * Regexp::TimeoutError so we want to protect it from leaking memory. */
+ if (rb_reg_match_time_limit) {
+ int state;
+ rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
+ if (state) {
+ onig_region_free(&args.regs, false);
+ rb_jump_tag(state);
+ }
+ }
+ else {
+ rb_reg_onig_match_try((VALUE)&args);
+ }
- OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, &regs);
- if (result == ONIG_MISMATCH) {
+ if (args.result == ONIG_MISMATCH) {
rb_backref_set(Qnil);
return ONIG_MISMATCH;
}
VALUE match = match_alloc(rb_cMatch);
rb_matchext_t *rm = RMATCH_EXT(match);
- rm->regs = regs;
+ rm->regs = args.regs;
if (set_backref_str) {
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
@@ -1774,7 +1810,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back
rb_backref_set(match);
if (set_match) *set_match = match;
- return result;
+ return args.result;
}
long
@@ -4601,12 +4637,9 @@ re_warn(const char *s)
rb_warn("%s", s);
}
-// The process-global timeout for regexp matching
-rb_hrtime_t rb_reg_match_time_limit = 0;
-
// This function is periodically called during regexp matching
-void
-rb_reg_check_timeout(regex_t *reg, void *end_time_)
+bool
+rb_reg_timeout_p(regex_t *reg, void *end_time_)
{
rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
@@ -4631,10 +4664,18 @@ rb_reg_check_timeout(regex_t *reg, void *end_time_)
}
else {
if (*end_time < rb_hrtime_now()) {
- // timeout is exceeded
- rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
+ // Timeout has exceeded
+ return true;
}
}
+
+ return false;
+}
+
+void
+rb_reg_raise_timeout(void)
+{
+ rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
}
/*
diff --git a/regexec.c b/regexec.c
index 9ba4106276..fb82d32531 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2240,7 +2240,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar *pkeep;
char *alloca_base;
char *xmalloc_base = NULL;
- OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
OnigStackType *stkp; /* used as any purpose. */
OnigStackIndex si;
OnigStackIndex *repeat_stk;
@@ -4142,6 +4142,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_SAVE;
xfree(xmalloc_base);
return ONIGERR_UNEXPECTED_BYTECODE;
+
+ timeout:
+ xfree(xmalloc_base);
+ xfree(stk_base);
+ HANDLE_REG_TIMEOUT_IN_MATCH_AT;
}
diff --git a/regint.h b/regint.h
index 034a31426c..57cbb81654 100644
--- a/regint.h
+++ b/regint.h
@@ -154,13 +154,18 @@
#ifdef RUBY
# define CHECK_INTERRUPT_IN_MATCH_AT do { \
- msa->counter++; \
- if (msa->counter >= 128) { \
- msa->counter = 0; \
- rb_reg_check_timeout(reg, &msa->end_time); \
- rb_thread_check_ints(); \
- } \
+ msa->counter++; \
+ if (msa->counter >= 128) { \
+ msa->counter = 0; \
+ if (rb_reg_timeout_p(reg, &msa->end_time)) { \
+ goto timeout; \
+ } \
+ rb_thread_check_ints(); \
+ } \
} while(0)
+# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \
+ rb_reg_raise_timeout(); \
+} while (0)
# define onig_st_init_table st_init_table
# define onig_st_init_table_with_size st_init_table_with_size
# define onig_st_init_numtable st_init_numtable
@@ -996,7 +1001,8 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c
#ifdef RUBY
extern size_t onig_memsize(const regex_t *reg);
extern size_t onig_region_memsize(const struct re_registers *regs);
-void rb_reg_check_timeout(regex_t *reg, void *end_time);
+bool rb_reg_timeout_p(regex_t *reg, void *end_time);
+NORETURN(void rb_reg_raise_timeout(void));
#endif
RUBY_SYMBOL_EXPORT_END
diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb
index 0d6ab4682d..4e04ccee69 100644
--- a/test/ruby/test_regexp.rb
+++ b/test/ruby/test_regexp.rb
@@ -1807,6 +1807,23 @@ class TestRegexp < Test::Unit::TestCase
end;
end
+ def test_s_timeout_memory_leak
+ assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", "[Bug #20228]", rss: true)
+ Regexp.timeout = 0.001
+ regex = /^(a*)*$/
+ str = "a" * 1000000 + "x"
+
+ code = proc do
+ regex =~ str
+ rescue
+ end
+
+ 10.times(&code)
+ begin;
+ 1_000.times(&code)
+ end;
+ end
+
def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout)
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect }