diff options
author | NARUSE, Yui <nurse@users.noreply.github.com> | 2024-03-20 22:40:50 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-20 13:40:50 +0000 |
commit | c626c201e4129bbea17583ecef73472c6f668c81 (patch) | |
tree | 04db541d0340a11e8ad842ee63adf30094ca1463 | |
parent | ce372be903e5f3597f1dc83cb558f165850b3bee (diff) |
merge revision(s) 01bfd1a2bf013a9ed92a9722ac5228187e05e6a8,1c120efe02d079b0a1dea573cf0fd7978d9cc857,31378dc0969f4466b2122d730b7298dd7004acdf: [Backport #20228] (#10301)
Fix memory leak in OnigRegion when match raises
[Bug #20228]
rb_reg_onig_match can raise a Regexp::TimeoutError, which would cause
the OnigRegion to leak.
Fix memory leak in stk_base when Regexp timeout
[Bug #20228]
If rb_reg_check_timeout raises a Regexp::TimeoutError, then the stk_base
will leak.
Add memory leak test for Regexp timeout
[Bug #20228]
-rw-r--r-- | re.c | 71 | ||||
-rw-r--r-- | regexec.c | 7 | ||||
-rw-r--r-- | regint.h | 20 | ||||
-rw-r--r-- | test/ruby/test_regexp.rb | 17 |
4 files changed, 92 insertions, 23 deletions
@@ -88,6 +88,9 @@ static const char casetable[] = { # error >>> "You lose. You will need a translation table for your character set." <<< #endif +// The process-global timeout for regexp matching +rb_hrtime_t rb_reg_match_time_limit = 0; + int rb_memcicmp(const void *x, const void *y, long len) { @@ -1732,6 +1735,23 @@ reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_p ONIG_OPTION_NONE); } +struct rb_reg_onig_match_args { + VALUE re; + VALUE str; + struct reg_onig_search_args args; + struct re_registers regs; + + OnigPosition result; +}; + +static VALUE +rb_reg_onig_match_try(VALUE value_args) +{ + struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args; + args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs); + return Qnil; +} + /* returns byte offset */ static long rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match) @@ -1742,22 +1762,38 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back return -1; } - struct reg_onig_search_args args = { - .pos = pos, - .range = reverse ? 0 : len, + struct rb_reg_onig_match_args args = { + .re = re, + .str = str, + .args = { + .pos = pos, + .range = reverse ? 0 : len, + }, + .regs = {0} }; - struct re_registers regs = {0}; + /* If there is a timeout set, then rb_reg_onig_match could raise a + * Regexp::TimeoutError so we want to protect it from leaking memory. */ + if (rb_reg_match_time_limit) { + int state; + rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state); + if (state) { + onig_region_free(&args.regs, false); + rb_jump_tag(state); + } + } + else { + rb_reg_onig_match_try((VALUE)&args); + } - OnigPosition result = rb_reg_onig_match(re, str, reg_onig_search, &args, ®s); - if (result == ONIG_MISMATCH) { + if (args.result == ONIG_MISMATCH) { rb_backref_set(Qnil); return ONIG_MISMATCH; } VALUE match = match_alloc(rb_cMatch); rb_matchext_t *rm = RMATCH_EXT(match); - rm->regs = regs; + rm->regs = args.regs; if (set_backref_str) { RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str)); @@ -1774,7 +1810,7 @@ rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_back rb_backref_set(match); if (set_match) *set_match = match; - return result; + return args.result; } long @@ -4601,12 +4637,9 @@ re_warn(const char *s) rb_warn("%s", s); } -// The process-global timeout for regexp matching -rb_hrtime_t rb_reg_match_time_limit = 0; - // This function is periodically called during regexp matching -void -rb_reg_check_timeout(regex_t *reg, void *end_time_) +bool +rb_reg_timeout_p(regex_t *reg, void *end_time_) { rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_; @@ -4631,10 +4664,18 @@ rb_reg_check_timeout(regex_t *reg, void *end_time_) } else { if (*end_time < rb_hrtime_now()) { - // timeout is exceeded - rb_raise(rb_eRegexpTimeoutError, "regexp match timeout"); + // Timeout has exceeded + return true; } } + + return false; +} + +void +rb_reg_raise_timeout(void) +{ + rb_raise(rb_eRegexpTimeoutError, "regexp match timeout"); } /* @@ -2240,7 +2240,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar *pkeep; char *alloca_base; char *xmalloc_base = NULL; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; + OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end; OnigStackType *stkp; /* used as any purpose. */ OnigStackIndex si; OnigStackIndex *repeat_stk; @@ -4142,6 +4142,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_SAVE; xfree(xmalloc_base); return ONIGERR_UNEXPECTED_BYTECODE; + + timeout: + xfree(xmalloc_base); + xfree(stk_base); + HANDLE_REG_TIMEOUT_IN_MATCH_AT; } @@ -154,13 +154,18 @@ #ifdef RUBY # define CHECK_INTERRUPT_IN_MATCH_AT do { \ - msa->counter++; \ - if (msa->counter >= 128) { \ - msa->counter = 0; \ - rb_reg_check_timeout(reg, &msa->end_time); \ - rb_thread_check_ints(); \ - } \ + msa->counter++; \ + if (msa->counter >= 128) { \ + msa->counter = 0; \ + if (rb_reg_timeout_p(reg, &msa->end_time)) { \ + goto timeout; \ + } \ + rb_thread_check_ints(); \ + } \ } while(0) +# define HANDLE_REG_TIMEOUT_IN_MATCH_AT do { \ + rb_reg_raise_timeout(); \ +} while (0) # define onig_st_init_table st_init_table # define onig_st_init_table_with_size st_init_table_with_size # define onig_st_init_numtable st_init_numtable @@ -996,7 +1001,8 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, c #ifdef RUBY extern size_t onig_memsize(const regex_t *reg); extern size_t onig_region_memsize(const struct re_registers *regs); -void rb_reg_check_timeout(regex_t *reg, void *end_time); +bool rb_reg_timeout_p(regex_t *reg, void *end_time); +NORETURN(void rb_reg_raise_timeout(void)); #endif RUBY_SYMBOL_EXPORT_END diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 0d6ab4682d..4e04ccee69 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1807,6 +1807,23 @@ class TestRegexp < Test::Unit::TestCase end; end + def test_s_timeout_memory_leak + assert_no_memory_leak([], "#{<<~"begin;"}", "#{<<~"end;"}", "[Bug #20228]", rss: true) + Regexp.timeout = 0.001 + regex = /^(a*)*$/ + str = "a" * 1000000 + "x" + + code = proc do + regex =~ str + rescue + end + + 10.times(&code) + begin; + 1_000.times(&code) + end; + end + def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } |