Skip to content

Commit e445cf3

Browse files
authored
Don't call rb_str_set_len while released the GVL. (#88)
* Only release the GVL where necessary. - Several string manipulation methods were invoked while the GVL was released. This is unsafe. - The mutex protecting multi-threaded access was not covering buffer state manipulation, leading to data corruption and out-of-bounds writes. - Using `rb_str_locktmp` prevents changes to buffer while it's in use. [Bug #20863]
1 parent 7bb6d98 commit e445cf3

File tree

2 files changed

+176
-118
lines changed

2 files changed

+176
-118
lines changed

‎ext/zlib/zlib.c‎

Lines changed: 174 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -674,9 +674,7 @@ zstream_expand_buffer(struct zstream *z)
674674
rb_obj_reveal(z->buf, rb_cString);
675675
}
676676

677-
rb_mutex_unlock(z->mutex);
678-
rb_protect(rb_yield, z->buf, &state);
679-
rb_mutex_lock(z->mutex);
677+
rb_protect(rb_yield, z->buf, &state);
680678

681679
if (ZSTREAM_REUSE_BUFFER_P(z)) {
682680
rb_str_modify(z->buf);
@@ -720,15 +718,14 @@ zstream_expand_buffer_into(struct zstream *z, unsigned long size)
720718
}
721719
}
722720

723-
static void *
724-
zstream_expand_buffer_protect(void *ptr)
721+
static int
722+
zstream_expand_buffer_protect(struct zstream *z)
725723
{
726-
struct zstream *z = (struct zstream *)ptr;
727724
int state = 0;
728725

729726
rb_protect((VALUE (*)(VALUE))zstream_expand_buffer, (VALUE)z, &state);
730727

731-
return (void *)(VALUE)state;
728+
return state;
732729
}
733730

734731
static int
@@ -1023,57 +1020,14 @@ zstream_ensure_end(VALUE v)
10231020
}
10241021

10251022
static void *
1026-
zstream_run_func(void *ptr)
1023+
zstream_run_once(void *_arguments)
10271024
{
1028-
struct zstream_run_args *args = (struct zstream_run_args *)ptr;
1029-
int err, state, flush = args->flush;
1030-
struct zstream *z = args->z;
1031-
uInt n;
1032-
1033-
err = Z_OK;
1034-
while (!args->interrupt) {
1035-
n = z->stream.avail_out;
1036-
err = z->func->run(&z->stream, flush);
1037-
rb_str_set_len(z->buf, ZSTREAM_BUF_FILLED(z) + (n - z->stream.avail_out));
1038-
1039-
if (err == Z_STREAM_END) {
1040-
z->flags &= ~ZSTREAM_FLAG_IN_STREAM;
1041-
z->flags |= ZSTREAM_FLAG_FINISHED;
1042-
break;
1043-
}
1044-
1045-
if (err != Z_OK && err != Z_BUF_ERROR)
1046-
break;
1047-
1048-
if (z->stream.avail_out > 0) {
1049-
z->flags |= ZSTREAM_FLAG_IN_STREAM;
1050-
break;
1051-
}
1052-
1053-
if (z->stream.avail_in == 0 && z->func == &inflate_funcs) {
1054-
/* break here because inflate() return Z_BUF_ERROR when avail_in == 0. */
1055-
/* but deflate() could be called with avail_in == 0 (there's hidden buffer
1056-
in zstream->state) */
1057-
z->flags |= ZSTREAM_FLAG_IN_STREAM;
1058-
break;
1059-
}
1060-
1061-
if (args->stream_output) {
1062-
state = (int)(VALUE)rb_thread_call_with_gvl(zstream_expand_buffer_protect,
1063-
(void *)z);
1064-
}
1065-
else {
1066-
state = zstream_expand_buffer_non_stream(z);
1067-
}
1025+
struct zstream_run_args *arguments = (struct zstream_run_args *)_arguments;
1026+
struct zstream *z = arguments->z;
10681027

1069-
if (state) {
1070-
err = Z_OK; /* buffer expanded but stream processing was stopped */
1071-
args->jump_state = state;
1072-
break;
1073-
}
1074-
}
1028+
uintptr_t error = z->func->run(&z->stream, arguments->flush);
10751029

1076-
return (void *)(VALUE)err;
1030+
return (void*)error;
10771031
}
10781032

10791033
/*
@@ -1088,6 +1042,86 @@ zstream_unblock_func(void *ptr)
10881042
args->interrupt = 1;
10891043
}
10901044

1045+
static VALUE
1046+
zstream_run_once_begin(VALUE _arguments)
1047+
{
1048+
struct zstream_run_args *arguments = (struct zstream_run_args *)_arguments;
1049+
struct zstream *z = arguments->z;
1050+
1051+
rb_str_locktmp(z->buf);
1052+
1053+
#ifndef RB_NOGVL_UBF_ASYNC_SAFE
1054+
return (VALUE)rb_thread_call_without_gvl(zstream_run_once, (void *)arguments, zstream_unblock_func, (void *)arguments);
1055+
#else
1056+
return (VALUE)rb_nogvl(zstream_run_once, (void *)arguments, zstream_unblock_func, (void *)arguments, RB_NOGVL_UBF_ASYNC_SAFE);
1057+
#endif
1058+
}
1059+
1060+
static VALUE
1061+
zstream_run_once_ensure(VALUE _arguments)
1062+
{
1063+
struct zstream_run_args *arguments = (struct zstream_run_args *)_arguments;
1064+
struct zstream *z = arguments->z;
1065+
1066+
rb_str_unlocktmp(z->buf);
1067+
1068+
return Qnil;
1069+
}
1070+
1071+
static int
1072+
zstream_run_func(struct zstream_run_args *args)
1073+
{
1074+
struct zstream *z = args->z;
1075+
int state;
1076+
uInt n;
1077+
1078+
int err = Z_OK;
1079+
while (!args->interrupt) {
1080+
n = z->stream.avail_out;
1081+
1082+
err = (int)(VALUE)rb_ensure(zstream_run_once_begin, (VALUE)args, zstream_run_once_ensure, (VALUE)args);
1083+
1084+
rb_str_set_len(z->buf, ZSTREAM_BUF_FILLED(z) + (n - z->stream.avail_out));
1085+
1086+
if (err == Z_STREAM_END) {
1087+
z->flags &= ~ZSTREAM_FLAG_IN_STREAM;
1088+
z->flags |= ZSTREAM_FLAG_FINISHED;
1089+
break;
1090+
}
1091+
1092+
if (err != Z_OK && err != Z_BUF_ERROR)
1093+
break;
1094+
1095+
if (z->stream.avail_out > 0) {
1096+
z->flags |= ZSTREAM_FLAG_IN_STREAM;
1097+
break;
1098+
}
1099+
1100+
if (z->stream.avail_in == 0 && z->func == &inflate_funcs) {
1101+
/* break here because inflate() return Z_BUF_ERROR when avail_in == 0. */
1102+
/* but deflate() could be called with avail_in == 0 (there's hidden buffer
1103+
in zstream->state) */
1104+
z->flags |= ZSTREAM_FLAG_IN_STREAM;
1105+
break;
1106+
}
1107+
1108+
if (args->stream_output) {
1109+
state = zstream_expand_buffer_protect(z);
1110+
}
1111+
else {
1112+
state = zstream_expand_buffer_non_stream(z);
1113+
}
1114+
1115+
if (state) {
1116+
err = Z_OK; /* buffer expanded but stream processing was stopped */
1117+
args->jump_state = state;
1118+
break;
1119+
}
1120+
}
1121+
1122+
return err;
1123+
}
1124+
10911125
static VALUE
10921126
zstream_run_try(VALUE value_arg)
10931127
{
@@ -1126,14 +1160,7 @@ zstream_run_try(VALUE value_arg)
11261160
}
11271161

11281162
loop:
1129-
#ifndef RB_NOGVL_UBF_ASYNC_SAFE
1130-
err = (int)(VALUE)rb_thread_call_without_gvl(zstream_run_func, (void *)args,
1131-
zstream_unblock_func, (void *)args);
1132-
#else
1133-
err = (int)(VALUE)rb_nogvl(zstream_run_func, (void *)args,
1134-
zstream_unblock_func, (void *)args,
1135-
RB_NOGVL_UBF_ASYNC_SAFE);
1136-
#endif
1163+
err = zstream_run_func(args);
11371164

11381165
/* retry if no exception is thrown */
11391166
if (err == Z_OK && args->interrupt) {
@@ -1184,7 +1211,6 @@ zstream_run_ensure(VALUE value_arg)
11841211

11851212
/* Remove ZSTREAM_IN_PROGRESS flag to signal that this zstream is not in use. */
11861213
z->flags &= ~ZSTREAM_IN_PROGRESS;
1187-
rb_mutex_unlock(z->mutex);
11881214

11891215
return Qnil;
11901216
}
@@ -1201,7 +1227,7 @@ zstream_run(struct zstream *z, Bytef *src, long len, int flush)
12011227
.jump_state = 0,
12021228
.stream_output = !ZSTREAM_IS_GZFILE(z) && rb_block_given_p(),
12031229
};
1204-
rb_mutex_lock(z->mutex);
1230+
12051231
rb_ensure(zstream_run_try, (VALUE)&args, zstream_run_ensure, (VALUE)&args);
12061232
if (args.jump_state)
12071233
rb_jump_tag(args.jump_state);
@@ -1770,6 +1796,22 @@ do_deflate(struct zstream *z, VALUE src, int flush)
17701796
}
17711797
}
17721798

1799+
struct rb_zlib_deflate_arguments {
1800+
struct zstream *z;
1801+
VALUE src;
1802+
int flush;
1803+
};
1804+
1805+
static VALUE
1806+
rb_deflate_deflate_body(VALUE args)
1807+
{
1808+
struct rb_zlib_deflate_arguments *arguments = (struct rb_zlib_deflate_arguments *)args;
1809+
1810+
do_deflate(arguments->z, arguments->src, arguments->flush);
1811+
1812+
return zstream_detach_buffer(arguments->z);
1813+
}
1814+
17731815
/*
17741816
* Document-method: Zlib::Deflate#deflate
17751817
*
@@ -1801,11 +1843,10 @@ rb_deflate_deflate(int argc, VALUE *argv, VALUE obj)
18011843
{
18021844
struct zstream *z = get_zstream(obj);
18031845
VALUE src, flush;
1804-
18051846
rb_scan_args(argc, argv, "11", &src, &flush);
1806-
do_deflate(z, src, ARG_FLUSH(flush));
1847+
struct rb_zlib_deflate_arguments arguments = {z, src, ARG_FLUSH(flush)};
18071848

1808-
return zstream_detach_buffer(z);
1849+
return rb_mutex_synchronize(z->mutex, rb_deflate_deflate_body, (VALUE)&arguments);
18091850
}
18101851

18111852
/*
@@ -2101,56 +2142,19 @@ rb_inflate_add_dictionary(VALUE obj, VALUE dictionary)
21012142
return obj;
21022143
}
21032144

2104-
/*
2105-
* Document-method: Zlib::Inflate#inflate
2106-
*
2107-
* call-seq:
2108-
* inflate(deflate_string, buffer: nil) -> String
2109-
* inflate(deflate_string, buffer: nil) { |chunk| ... } -> nil
2110-
*
2111-
* Inputs +deflate_string+ into the inflate stream and returns the output from
2112-
* the stream. Calling this method, both the input and the output buffer of
2113-
* the stream are flushed. If string is +nil+, this method finishes the
2114-
* stream, just like Zlib::ZStream#finish.
2115-
*
2116-
* If a block is given consecutive inflated chunks from the +deflate_string+
2117-
* are yielded to the block and +nil+ is returned.
2118-
*
2119-
* If a :buffer keyword argument is given and not nil:
2120-
*
2121-
* * The :buffer keyword should be a String, and will used as the output buffer.
2122-
* Using this option can reuse the memory required during inflation.
2123-
* * When not passing a block, the return value will be the same object as the
2124-
* :buffer keyword argument.
2125-
* * When passing a block, the yielded chunks will be the same value as the
2126-
* :buffer keyword argument.
2127-
*
2128-
* Raises a Zlib::NeedDict exception if a preset dictionary is needed to
2129-
* decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then
2130-
* call this method again with an empty string to flush the stream:
2131-
*
2132-
* inflater = Zlib::Inflate.new
2133-
*
2134-
* begin
2135-
* out = inflater.inflate compressed
2136-
* rescue Zlib::NeedDict
2137-
* # ensure the dictionary matches the stream's required dictionary
2138-
* raise unless inflater.adler == Zlib.adler32(dictionary)
2139-
*
2140-
* inflater.set_dictionary dictionary
2141-
* inflater.inflate ''
2142-
* end
2143-
*
2144-
* # ...
2145-
*
2146-
* inflater.close
2147-
*
2148-
* See also Zlib::Inflate.new
2149-
*/
2145+
struct rb_zlib_inflate_arguments {
2146+
struct zstream *z;
2147+
int argc;
2148+
VALUE *argv;
2149+
};
2150+
21502151
static VALUE
2151-
rb_inflate_inflate(int argc, VALUE* argv, VALUE obj)
2152+
rb_inflate_inflate_body(VALUE _arguments)
21522153
{
2153-
struct zstream *z = get_zstream(obj);
2154+
struct rb_zlib_inflate_arguments *arguments = (struct rb_zlib_inflate_arguments*)_arguments;
2155+
struct zstream *z = arguments->z;
2156+
int argc = arguments->argc;
2157+
VALUE *argv = arguments->argv;
21542158
VALUE dst, src, opts, buffer = Qnil;
21552159

21562160
if (OPTHASH_GIVEN_P(opts)) {
@@ -2205,6 +2209,60 @@ rb_inflate_inflate(int argc, VALUE* argv, VALUE obj)
22052209
return dst;
22062210
}
22072211

2212+
/*
2213+
* Document-method: Zlib::Inflate#inflate
2214+
*
2215+
* call-seq:
2216+
* inflate(deflate_string, buffer: nil) -> String
2217+
* inflate(deflate_string, buffer: nil) { |chunk| ... } -> nil
2218+
*
2219+
* Inputs +deflate_string+ into the inflate stream and returns the output from
2220+
* the stream. Calling this method, both the input and the output buffer of
2221+
* the stream are flushed. If string is +nil+, this method finishes the
2222+
* stream, just like Zlib::ZStream#finish.
2223+
*
2224+
* If a block is given consecutive inflated chunks from the +deflate_string+
2225+
* are yielded to the block and +nil+ is returned.
2226+
*
2227+
* If a :buffer keyword argument is given and not nil:
2228+
*
2229+
* * The :buffer keyword should be a String, and will used as the output buffer.
2230+
* Using this option can reuse the memory required during inflation.
2231+
* * When not passing a block, the return value will be the same object as the
2232+
* :buffer keyword argument.
2233+
* * When passing a block, the yielded chunks will be the same value as the
2234+
* :buffer keyword argument.
2235+
*
2236+
* Raises a Zlib::NeedDict exception if a preset dictionary is needed to
2237+
* decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then
2238+
* call this method again with an empty string to flush the stream:
2239+
*
2240+
* inflater = Zlib::Inflate.new
2241+
*
2242+
* begin
2243+
* out = inflater.inflate compressed
2244+
* rescue Zlib::NeedDict
2245+
* # ensure the dictionary matches the stream's required dictionary
2246+
* raise unless inflater.adler == Zlib.adler32(dictionary)
2247+
*
2248+
* inflater.set_dictionary dictionary
2249+
* inflater.inflate ''
2250+
* end
2251+
*
2252+
* # ...
2253+
*
2254+
* inflater.close
2255+
*
2256+
* See also Zlib::Inflate.new
2257+
*/
2258+
static VALUE
2259+
rb_inflate_inflate(int argc, VALUE* argv, VALUE obj)
2260+
{
2261+
struct zstream *z = get_zstream(obj);
2262+
struct rb_zlib_inflate_arguments arguments = {z, argc, argv};
2263+
return rb_mutex_synchronize(z->mutex, rb_inflate_inflate_body, (VALUE)&arguments);
2264+
}
2265+
22082266
/*
22092267
* call-seq: << string
22102268
*

‎test/zlib/test_zlib.rb‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ def test_recursive_deflate
545545
zd = Zlib::Deflate.new
546546

547547
s = SecureRandom.random_bytes(1024**2)
548-
assert_raise(Zlib::InProgressError) do
548+
assert_raise(ThreadError) do
549549
zd.deflate(s) do
550550
zd.deflate(s)
551551
end
@@ -563,7 +563,7 @@ def test_recursive_inflate
563563

564564
s = Zlib.deflate(SecureRandom.random_bytes(1024**2))
565565

566-
assert_raise(Zlib::InProgressError) do
566+
assert_raise(ThreadError) do
567567
zi.inflate(s) do
568568
zi.inflate(s)
569569
end

0 commit comments

Comments
 (0)