python3-urllib3: fix CVE-2025-66418 CVE-2025-66471

References:
https://nvd.nist.gov/vuln/detail/CVE-2025-66418
https://nvd.nist.gov/vuln/detail/CVE-2025-66471

(From OE-Core rev: d9f52c5f86bcc4716e384fe5c01c03d386d60446)

Signed-off-by: Jiaying Song <jiaying.song.cn@windriver.com>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
This commit is contained in:
Jiaying Song 2025-12-17 16:26:33 +08:00 committed by Steve Sakoman
parent 67ac024a29
commit b9843e68be
3 changed files with 667 additions and 0 deletions

View File

@ -0,0 +1,80 @@
From 3bf7db860ef730e828b68264e88210190120cacf Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Fri, 5 Dec 2025 16:41:33 +0200
Subject: [PATCH] Merge commit from fork
* Add a hard-coded limit for the decompression chain
* Reuse new list
CVE: CVE-2025-66418
Upstream-Status: Backport
[https://github.com/urllib3/urllib3/commit/24d7b67eac89f94e11003424bcf0d8f7b72222a8]
Signed-off-by: Jiaying Song <jiaying.song.cn@windriver.com>
---
changelog/GHSA-gm62-xv2j-4w53.security.rst | 4 ++++
src/urllib3/response.py | 12 +++++++++++-
test/test_response.py | 10 ++++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
create mode 100644 changelog/GHSA-gm62-xv2j-4w53.security.rst
diff --git a/changelog/GHSA-gm62-xv2j-4w53.security.rst b/changelog/GHSA-gm62-xv2j-4w53.security.rst
new file mode 100644
index 00000000..6646eaa3
--- /dev/null
+++ b/changelog/GHSA-gm62-xv2j-4w53.security.rst
@@ -0,0 +1,4 @@
+Fixed a security issue where an attacker could compose an HTTP response with
+virtually unlimited links in the ``Content-Encoding`` header, potentially
+leading to a denial of service (DoS) attack by exhausting system resources
+during decoding. The number of allowed chained encodings is now limited to 5.
diff --git a/src/urllib3/response.py b/src/urllib3/response.py
index a0273d65..b8e8565c 100644
--- a/src/urllib3/response.py
+++ b/src/urllib3/response.py
@@ -194,8 +194,18 @@ class MultiDecoder(ContentDecoder):
they were applied.
"""
+ # Maximum allowed number of chained HTTP encodings in the
+ # Content-Encoding header.
+ max_decode_links = 5
+
def __init__(self, modes: str) -> None:
- self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
+ encodings = [m.strip() for m in modes.split(",")]
+ if len(encodings) > self.max_decode_links:
+ raise DecodeError(
+ "Too many content encodings in the chain: "
+ f"{len(encodings)} > {self.max_decode_links}"
+ )
+ self._decoders = [_get_decoder(e) for e in encodings]
def flush(self) -> bytes:
return self._decoders[0].flush()
diff --git a/test/test_response.py b/test/test_response.py
index c0062771..0e8abd93 100644
--- a/test/test_response.py
+++ b/test/test_response.py
@@ -581,6 +581,16 @@ class TestResponse:
assert r.read(9 * 37) == b"foobarbaz" * 37
assert r.read() == b""
+ def test_read_multi_decoding_too_many_links(self) -> None:
+ fp = BytesIO(b"foo")
+ with pytest.raises(
+ DecodeError, match="Too many content encodings in the chain: 6 > 5"
+ ):
+ HTTPResponse(
+ fp,
+ headers={"content-encoding": "gzip, deflate, br, zstd, gzip, deflate"},
+ )
+
def test_body_blob(self) -> None:
resp = HTTPResponse(b"foo")
assert resp.data == b"foo"
--
2.34.1

View File

@ -0,0 +1,585 @@
From f25c0d11e1b640e3c7e0addb66a1ff50730be508 Mon Sep 17 00:00:00 2001
From: Illia Volochii <illia.volochii@gmail.com>
Date: Fri, 5 Dec 2025 16:40:41 +0200
Subject: [PATCH] Merge commit from fork
* Prevent decompression bomb for zstd in Python 3.14
* Add experimental `decompress_iter` for Brotli
* Update changes for Brotli
* Add `GzipDecoder.decompress_iter`
* Test https://github.com/python-hyper/brotlicffi/pull/207
* Pin Brotli
* Add `decompress_iter` to all decoders and make tests pass
* Pin brotlicffi to an official release
* Revert changes to response.py
* Add `max_length` parameter to all `decompress` methods
* Fix the `test_brotlipy` session
* Unset `_data` on gzip error
* Add a test for memory usage
* Test more methods
* Fix the test for `stream`
* Cover more lines with tests
* Add more coverage
* Make `read1` a bit more efficient
* Fix PyPy tests for Brotli
* Revert an unnecessarily moved check
* Add some comments
* Leave just one `self._obj.decompress` call in `GzipDecoder`
* Refactor test params
* Test reads with all data already in the decompressor
* Prevent needless copying of data decoded with `max_length`
* Rename the changed test
* Note that responses of unknown length should be streamed too
* Add a changelog entry
* Avoid returning a memory view from `BytesQueueBuffer`
* Add one more note to the changelog entry
CVE: CVE-2025-66471
Upstream-Status: Backport
[https://github.com/urllib3/urllib3/commit/c19571de34c47de3a766541b041637ba5f716ed7]
Signed-off-by: Jiaying Song <jiaying.song.cn@windriver.com>
---
docs/advanced-usage.rst | 3 +-
docs/user-guide.rst | 4 +-
pyproject.toml | 5 +-
src/urllib3/response.py | 278 ++++++++++++++++++++++++++++++++++------
4 files changed, 246 insertions(+), 44 deletions(-)
diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst
index 36a51e67..a12c7143 100644
--- a/docs/advanced-usage.rst
+++ b/docs/advanced-usage.rst
@@ -66,7 +66,8 @@ When using ``preload_content=True`` (the default setting) the
response body will be read immediately into memory and the HTTP connection
will be released back into the pool without manual intervention.
-However, when dealing with large responses it's often better to stream the response
+However, when dealing with responses of large or unknown length,
+it's often better to stream the response
content using ``preload_content=False``. Setting ``preload_content`` to ``False`` means
that urllib3 will only read from the socket when data is requested.
diff --git a/docs/user-guide.rst b/docs/user-guide.rst
index 5c78c8af..1d9d0bbd 100644
--- a/docs/user-guide.rst
+++ b/docs/user-guide.rst
@@ -145,8 +145,8 @@ to a byte string representing the response content:
print(resp.data)
# b"\xaa\xa5H?\x95\xe9\x9b\x11"
-.. note:: For larger responses, it's sometimes better to :ref:`stream <stream>`
- the response.
+.. note:: For responses of large or unknown length, it's sometimes better to
+ :ref:`stream <stream>` the response.
Using io Wrappers with Response Content
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pyproject.toml b/pyproject.toml
index 1fe82937..58a2c2db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,8 +40,8 @@ dynamic = ["version"]
[project.optional-dependencies]
brotli = [
- "brotli>=1.0.9; platform_python_implementation == 'CPython'",
- "brotlicffi>=0.8.0; platform_python_implementation != 'CPython'"
+ "brotli>=1.2.0; platform_python_implementation == 'CPython'",
+ "brotlicffi>=1.2.0.0; platform_python_implementation != 'CPython'"
]
zstd = [
"zstandard>=0.18.0",
@@ -95,6 +95,7 @@ filterwarnings = [
'''default:ssl\.PROTOCOL_TLSv1_1 is deprecated:DeprecationWarning''',
'''default:ssl\.PROTOCOL_TLSv1_2 is deprecated:DeprecationWarning''',
'''default:ssl NPN is deprecated, use ALPN instead:DeprecationWarning''',
+ '''default:Brotli >= 1.2.0 is required to prevent decompression bombs\.:urllib3.exceptions.DependencyWarning''',
'''default:Async generator 'quart\.wrappers\.response\.DataBody\.__aiter__\.<locals>\._aiter' was garbage collected.*:ResourceWarning''', # https://github.com/pallets/quart/issues/301
'''default:unclosed file <_io\.BufferedWriter name='/dev/null'>:ResourceWarning''', # https://github.com/SeleniumHQ/selenium/issues/13328
]
diff --git a/src/urllib3/response.py b/src/urllib3/response.py
index b8e8565c..4304133e 100644
--- a/src/urllib3/response.py
+++ b/src/urllib3/response.py
@@ -49,6 +49,7 @@ from .connection import BaseSSLError, HTTPConnection, HTTPException
from .exceptions import (
BodyNotHttplibCompatible,
DecodeError,
+ DependencyWarning,
HTTPError,
IncompleteRead,
InvalidChunkLength,
@@ -68,7 +69,11 @@ log = logging.getLogger(__name__)
class ContentDecoder:
- def decompress(self, data: bytes) -> bytes:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ raise NotImplementedError()
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
raise NotImplementedError()
def flush(self) -> bytes:
@@ -78,30 +83,57 @@ class ContentDecoder:
class DeflateDecoder(ContentDecoder):
def __init__(self) -> None:
self._first_try = True
- self._data = b""
+ self._first_try_data = b""
+ self._unfed_data = b""
self._obj = zlib.decompressobj()
- def decompress(self, data: bytes) -> bytes:
- if not data:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ data = self._unfed_data + data
+ self._unfed_data = b""
+ if not data and not self._obj.unconsumed_tail:
return data
+ original_max_length = max_length
+ if original_max_length < 0:
+ max_length = 0
+ elif original_max_length == 0:
+ # We should not pass 0 to the zlib decompressor because 0 is
+ # the default value that will make zlib decompress without a
+ # length limit.
+ # Data should be stored for subsequent calls.
+ self._unfed_data = data
+ return b""
+ # Subsequent calls always reuse `self._obj`. zlib requires
+ # passing the unconsumed tail if decompression is to continue.
if not self._first_try:
- return self._obj.decompress(data)
+ return self._obj.decompress(
+ self._obj.unconsumed_tail + data, max_length=max_length
+ )
- self._data += data
+ # First call tries with RFC 1950 ZLIB format.
+ self._first_try_data += data
try:
- decompressed = self._obj.decompress(data)
+ decompressed = self._obj.decompress(data, max_length=max_length)
if decompressed:
self._first_try = False
- self._data = None # type: ignore[assignment]
+ self._first_try_data = b""
return decompressed
+ # On failure, it falls back to RFC 1951 DEFLATE format.
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
- return self.decompress(self._data)
+ return self.decompress(
+ self._first_try_data, max_length=original_max_length
+ )
finally:
- self._data = None # type: ignore[assignment]
+ self._first_try_data = b""
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return bool(self._unfed_data) or (
+ bool(self._obj.unconsumed_tail) and not self._first_try
+ )
def flush(self) -> bytes:
return self._obj.flush()
@@ -117,27 +149,61 @@ class GzipDecoder(ContentDecoder):
def __init__(self) -> None:
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
self._state = GzipDecoderState.FIRST_MEMBER
+ self._unconsumed_tail = b""
- def decompress(self, data: bytes) -> bytes:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
ret = bytearray()
- if self._state == GzipDecoderState.SWALLOW_DATA or not data:
+ if self._state == GzipDecoderState.SWALLOW_DATA:
return bytes(ret)
+
+ if max_length == 0:
+ # We should not pass 0 to the zlib decompressor because 0 is
+ # the default value that will make zlib decompress without a
+ # length limit.
+ # Data should be stored for subsequent calls.
+ self._unconsumed_tail += data
+ return b""
+
+ # zlib requires passing the unconsumed tail to the subsequent
+ # call if decompression is to continue.
+ data = self._unconsumed_tail + data
+ if not data and self._obj.eof:
+ return bytes(ret)
+
while True:
try:
- ret += self._obj.decompress(data)
+ ret += self._obj.decompress(
+ data, max_length=max(max_length - len(ret), 0)
+ )
except zlib.error:
previous_state = self._state
# Ignore data after the first error
self._state = GzipDecoderState.SWALLOW_DATA
+ self._unconsumed_tail = b""
if previous_state == GzipDecoderState.OTHER_MEMBERS:
# Allow trailing garbage acceptable in other gzip clients
return bytes(ret)
raise
- data = self._obj.unused_data
+
+ self._unconsumed_tail = data = (
+ self._obj.unconsumed_tail or self._obj.unused_data
+ )
+ if max_length > 0 and len(ret) >= max_length:
+ break
+
if not data:
return bytes(ret)
- self._state = GzipDecoderState.OTHER_MEMBERS
- self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+ # When the end of a gzip member is reached, a new decompressor
+ # must be created for unused (possibly future) data.
+ if self._obj.eof:
+ self._state = GzipDecoderState.OTHER_MEMBERS
+ self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
+
+ return bytes(ret)
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return bool(self._unconsumed_tail)
def flush(self) -> bytes:
return self._obj.flush()
@@ -152,9 +218,35 @@ if brotli is not None:
def __init__(self) -> None:
self._obj = brotli.Decompressor()
if hasattr(self._obj, "decompress"):
- setattr(self, "decompress", self._obj.decompress)
+ setattr(self, "_decompress", self._obj.decompress)
else:
- setattr(self, "decompress", self._obj.process)
+ setattr(self, "_decompress", self._obj.process)
+
+ # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
+ def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
+ raise NotImplementedError()
+
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ try:
+ if max_length > 0:
+ return self._decompress(data, output_buffer_limit=max_length)
+ else:
+ return self._decompress(data)
+ except TypeError:
+ # Fallback for Brotli/brotlicffi/brotlipy versions without
+ # the `output_buffer_limit` parameter.
+ warnings.warn(
+ "Brotli >= 1.2.0 is required to prevent decompression bombs.",
+ DependencyWarning,
+ )
+ return self._decompress(data)
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ try:
+ return not self._obj.can_accept_more_data()
+ except AttributeError:
+ return False
def flush(self) -> bytes:
if hasattr(self._obj, "flush"):
@@ -168,16 +260,46 @@ if HAS_ZSTD:
def __init__(self) -> None:
self._obj = zstd.ZstdDecompressor().decompressobj()
- def decompress(self, data: bytes) -> bytes:
- if not data:
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ if not data and not self.has_unconsumed_tail:
return b""
- data_parts = [self._obj.decompress(data)]
- while self._obj.eof and self._obj.unused_data:
+ if self._obj.eof:
+ data = self._obj.unused_data + data
+ self._obj = zstd.ZstdDecompressor()
+ part = self._obj.decompress(data, max_length=max_length)
+ length = len(part)
+ data_parts = [part]
+ # Every loop iteration is supposed to read data from a separate frame.
+ # The loop breaks when:
+ # - enough data is read;
+ # - no more unused data is available;
+ # - end of the last read frame has not been reached (i.e.,
+ # more data has to be fed).
+ while (
+ self._obj.eof
+ and self._obj.unused_data
+ and (max_length < 0 or length < max_length)
+ ):
unused_data = self._obj.unused_data
- self._obj = zstd.ZstdDecompressor().decompressobj()
- data_parts.append(self._obj.decompress(unused_data))
+ if not self._obj.needs_input:
+ self._obj = zstd.ZstdDecompressor()
+ part = self._obj.decompress(
+ unused_data,
+ max_length=(max_length - length) if max_length > 0 else -1,
+ )
+ if part_length := len(part):
+ data_parts.append(part)
+ length += part_length
+ elif self._obj.needs_input:
+ break
return b"".join(data_parts)
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return not (self._obj.needs_input or self._obj.eof) or bool(
+ self._obj.unused_data
+ )
+
def flush(self) -> bytes:
ret = self._obj.flush() # note: this is a no-op
if not self._obj.eof:
@@ -210,10 +332,35 @@ class MultiDecoder(ContentDecoder):
def flush(self) -> bytes:
return self._decoders[0].flush()
- def decompress(self, data: bytes) -> bytes:
- for d in reversed(self._decoders):
- data = d.decompress(data)
- return data
+ def decompress(self, data: bytes, max_length: int = -1) -> bytes:
+ if max_length <= 0:
+ for d in reversed(self._decoders):
+ data = d.decompress(data)
+ return data
+
+ ret = bytearray()
+ # Every while loop iteration goes through all decoders once.
+ # It exits when enough data is read or no more data can be read.
+ # It is possible that the while loop iteration does not produce
+ # any data because we retrieve up to `max_length` from every
+ # decoder, and the amount of bytes may be insufficient for the
+ # next decoder to produce enough/any output.
+ while True:
+ any_data = False
+ for d in reversed(self._decoders):
+ data = d.decompress(data, max_length=max_length - len(ret))
+ if data:
+ any_data = True
+ # We should not break when no data is returned because
+ # next decoders may produce data even with empty input.
+ ret += data
+ if not any_data or len(ret) >= max_length:
+ return bytes(ret)
+ data = b""
+
+ @property
+ def has_unconsumed_tail(self) -> bool:
+ return any(d.has_unconsumed_tail for d in self._decoders)
def _get_decoder(mode: str) -> ContentDecoder:
@@ -246,9 +393,6 @@ class BytesQueueBuffer:
* self.buffer, which contains the full data
* the largest chunk that we will copy in get()
-
- The worst case scenario is a single chunk, in which case we'll make a full copy of
- the data inside get().
"""
def __init__(self) -> None:
@@ -270,6 +414,10 @@ class BytesQueueBuffer:
elif n < 0:
raise ValueError("n should be > 0")
+ if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
+ self._size -= n
+ return self.buffer.popleft()
+
fetched = 0
ret = io.BytesIO()
while fetched < n:
@@ -473,7 +621,11 @@ class BaseHTTPResponse(io.IOBase):
self._decoder = _get_decoder(content_encoding)
def _decode(
- self, data: bytes, decode_content: bool | None, flush_decoder: bool
+ self,
+ data: bytes,
+ decode_content: bool | None,
+ flush_decoder: bool,
+ max_length: int | None = None,
) -> bytes:
"""
Decode the data passed in and potentially flush the decoder.
@@ -486,9 +638,12 @@ class BaseHTTPResponse(io.IOBase):
)
return data
+ if max_length is None or flush_decoder:
+ max_length = -1
+
try:
if self._decoder:
- data = self._decoder.decompress(data)
+ data = self._decoder.decompress(data, max_length=max_length)
self._has_decoded_content = True
except self.DECODER_ERROR_CLASSES as e:
content_encoding = self.headers.get("content-encoding", "").lower()
@@ -953,6 +1108,14 @@ class HTTPResponse(BaseHTTPResponse):
elif amt is not None:
cache_content = False
+ if self._decoder and self._decoder.has_unconsumed_tail:
+ decoded_data = self._decode(
+ b"",
+ decode_content,
+ flush_decoder=False,
+ max_length=amt - len(self._decoded_buffer),
+ )
+ self._decoded_buffer.put(decoded_data)
if len(self._decoded_buffer) >= amt:
return self._decoded_buffer.get(amt)
@@ -960,7 +1123,11 @@ class HTTPResponse(BaseHTTPResponse):
flush_decoder = amt is None or (amt != 0 and not data)
- if not data and len(self._decoded_buffer) == 0:
+ if (
+ not data
+ and len(self._decoded_buffer) == 0
+ and not (self._decoder and self._decoder.has_unconsumed_tail)
+ ):
return data
if amt is None:
@@ -977,7 +1144,12 @@ class HTTPResponse(BaseHTTPResponse):
)
return data
- decoded_data = self._decode(data, decode_content, flush_decoder)
+ decoded_data = self._decode(
+ data,
+ decode_content,
+ flush_decoder,
+ max_length=amt - len(self._decoded_buffer),
+ )
self._decoded_buffer.put(decoded_data)
while len(self._decoded_buffer) < amt and data:
@@ -985,7 +1157,12 @@ class HTTPResponse(BaseHTTPResponse):
# For example, the GZ file header takes 10 bytes, we don't want to read
# it one byte at a time
data = self._raw_read(amt)
- decoded_data = self._decode(data, decode_content, flush_decoder)
+ decoded_data = self._decode(
+ data,
+ decode_content,
+ flush_decoder,
+ max_length=amt - len(self._decoded_buffer),
+ )
self._decoded_buffer.put(decoded_data)
data = self._decoded_buffer.get(amt)
@@ -1020,6 +1197,20 @@ class HTTPResponse(BaseHTTPResponse):
"Calling read1(decode_content=False) is not supported after "
"read1(decode_content=True) was called."
)
+ if (
+ self._decoder
+ and self._decoder.has_unconsumed_tail
+ and (amt is None or len(self._decoded_buffer) < amt)
+ ):
+ decoded_data = self._decode(
+ b"",
+ decode_content,
+ flush_decoder=False,
+ max_length=(
+ amt - len(self._decoded_buffer) if amt is not None else None
+ ),
+ )
+ self._decoded_buffer.put(decoded_data)
if len(self._decoded_buffer) > 0:
if amt is None:
return self._decoded_buffer.get_all()
@@ -1035,7 +1226,9 @@ class HTTPResponse(BaseHTTPResponse):
self._init_decoder()
while True:
flush_decoder = not data
- decoded_data = self._decode(data, decode_content, flush_decoder)
+ decoded_data = self._decode(
+ data, decode_content, flush_decoder, max_length=amt
+ )
self._decoded_buffer.put(decoded_data)
if decoded_data or flush_decoder:
break
@@ -1066,7 +1259,11 @@ class HTTPResponse(BaseHTTPResponse):
if self.chunked and self.supports_chunked_reads():
yield from self.read_chunked(amt, decode_content=decode_content)
else:
- while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
+ while (
+ not is_fp_closed(self._fp)
+ or len(self._decoded_buffer) > 0
+ or (self._decoder and self._decoder.has_unconsumed_tail)
+ ):
data = self.read(amt=amt, decode_content=decode_content)
if data:
@@ -1218,7 +1415,10 @@ class HTTPResponse(BaseHTTPResponse):
break
chunk = self._handle_chunk(amt)
decoded = self._decode(
- chunk, decode_content=decode_content, flush_decoder=False
+ chunk,
+ decode_content=decode_content,
+ flush_decoder=False,
+ max_length=amt,
)
if decoded:
yield decoded
--
2.34.1

View File

@ -9,6 +9,8 @@ inherit pypi python_hatchling
SRC_URI += " \
file://CVE-2025-50181.patch \
file://CVE-2025-66418.patch \
file://CVE-2025-66471.patch \
"
RDEPENDS:${PN} += "\