Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-15-SP2:Update
python-gevent.30877
CVE-2023-41419-http-req-smuggle.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File CVE-2023-41419-http-req-smuggle.patch of Package python-gevent.30877
From 2f53c851eaf926767fbac62385615efd4886221c Mon Sep 17 00:00:00 2001 From: Jason Madden <jamadden@gmail.com> Date: Thu, 31 Aug 2023 17:05:48 -0500 Subject: [PATCH] gevent.pywsgi: Much improved handling of chunk trailers. Validation is much stricter to the specification. Fixes #1989 --- docs/changes/1989.bugfix | 26 +++++ src/gevent/pywsgi.py | 213 +++++++++++++++++++++++++++++++++++++---------- src/gevent/subprocess.py | 7 - 3 files changed, 201 insertions(+), 45 deletions(-) create mode 100644 docs/changes/1989.bugfix --- /dev/null +++ b/docs/changes/1989.bugfix @@ -0,0 +1,26 @@ +Make ``gevent.pywsgi`` comply more closely with the HTTP specification +for chunked transfer encoding. In particular, we are much stricter +about trailers, and trailers that are invalid (too long or featuring +disallowed characters) forcibly close the connection to the client +*after* the results have been sent. + +Trailers otherwise continue to be ignored and are not available to the +WSGI application. + +Previously, carefully crafted invalid trailers in chunked requests on +keep-alive connections might appear as two requests to +``gevent.pywsgi``. Because this was handled exactly as a normal +keep-alive connection with two requests, the WSGI application should +handle it normally. However, if you were counting on some upstream +server to filter incoming requests based on paths or header fields, +and the upstream server simply passed trailers through without +validating them, then this embedded second request would bypass those +checks. (If the upstream server validated that the trailers meet the +HTTP specification, this could not occur, because characters that are +required in an HTTP request, like a space, are not allowed in +trailers.) CVE-2023-41419 was reserved for this. + +Our thanks to the original reporters, Keran Mu +(mkr22@mails.tsinghua.edu.cn) and Jianjun Chen +(jianjun@tsinghua.edu.cn), from Tsinghua University and Zhongguancun +Laboratory. --- a/src/gevent/pywsgi.py +++ b/src/gevent/pywsgi.py @@ -8,6 +8,22 @@ WSGI work is handled by :class:`WSGIHand created for each request. The server can be customized to use different subclasses of :class:`WSGIHandler`. +This server is intended primarily for development and testing, +and secondarily for other "safe" scenarios where it will not be +exposed to potentially malicious input. The code has not been +security audited, and is not intended for direct exposure to the +public Internet. For production usage on the Internet, either +choose a production-strength server such as gunicorn, or put +a reverse proxy between gevent and the Internet. + +Complies more closely with the HTTP specification for chunked +transfer encoding. In particular, we are much stricter about +trailers, and trailers that are invalid (too long or featuring +disallowed characters) forcibly close the connection to the +client *after* the results have been sent. + +Trailers otherwise continue to be ignored and are not available +to the WSGI application. """ # FIXME: Can we refactor to make smallor? # pylint:disable=too-many-lines @@ -51,29 +67,47 @@ __all__ = [ MAX_REQUEST_LINE = 8192 # Weekday and month names for HTTP date/time formatting; always English! -_WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -_MONTHNAME = [None, # Dummy so we can use 1-based month numbers +_WEEKDAYNAME = ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun") +_MONTHNAME = (None, # Dummy so we can use 1-based month numbers "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") # The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits, # in byte form for comparing to the network. _HEX = string.hexdigits.encode('ascii') +# The characters allowed in "token" rules. + +# token = 1*tchar +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +# ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +_ALLOWED_TOKEN_CHARS = frozenset( + # Remember we have to be careful because bytestrings + # inexplicably iterate as integers, which are not equal to bytes. + + # explicit chars then DIGIT + (c.encode('ascii') for c in "!#$%&'*+-.^_`|~0123456789") + # Then we add ALPHA +) | {c.encode('ascii') for c in string.ascii_letters} +assert b'A' in _ALLOWED_TOKEN_CHARS + # Errors _ERRORS = dict() _INTERNAL_ERROR_STATUS = '500 Internal Server Error' _INTERNAL_ERROR_BODY = b'Internal Server Error' -_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'), +_INTERNAL_ERROR_HEADERS = (('Content-Type', 'text/plain'), ('Connection', 'close'), - ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))] + ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))) _ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY) _BAD_REQUEST_STATUS = '400 Bad Request' _BAD_REQUEST_BODY = '' -_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'), +_BAD_REQUEST_HEADERS = (('Content-Type', 'text/plain'), ('Connection', 'close'), - ('Content-Length', str(len(_BAD_REQUEST_BODY)))] + ('Content-Length', str(len(_BAD_REQUEST_BODY)))) _ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY) _REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n" @@ -198,23 +232,32 @@ class Input(object): # Read and return the next integer chunk length. If no # chunk length can be read, raises _InvalidClientInput. - # Here's the production for a chunk: - # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html) - # chunk = chunk-size [ chunk-extension ] CRLF - # chunk-data CRLF - # chunk-size = 1*HEX - # chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) - # chunk-ext-name = token - # chunk-ext-val = token | quoted-string - - # To cope with malicious or broken clients that fail to send valid - # chunk lines, the strategy is to read character by character until we either reach - # a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a - # ;, indicating an chunk-extension, we'll read up to the next - # MAX_REQUEST_LINE characters - # looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters, - # (the number needed to represent a 64-bit chunk size), we bail (this protects us from - # a client that sends an infinite stream of `F`, for example). + # Here's the production for a chunk (actually the whole body): + # (https://www.rfc-editor.org/rfc/rfc7230#section-4.1) + + # chunked-body = *chunk + # last-chunk + # trailer-part + # CRLF + # + # chunk = chunk-size [ chunk-ext ] CRLF + # chunk-data CRLF + # chunk-size = 1*HEXDIG + # last-chunk = 1*("0") [ chunk-ext ] CRLF + # trailer-part = *( header-field CRLF ) + # chunk-data = 1*OCTET ; a sequence of chunk-size octets + + # To cope with malicious or broken clients that fail to send + # valid chunk lines, the strategy is to read character by + # character until we either reach a ; or newline. If at any + # time we read a non-HEX digit, we bail. If we hit a ;, + # indicating an chunk-extension, we'll read up to the next + # MAX_REQUEST_LINE characters ("A server ought to limit the + # total length of chunk extensions received") looking for the + # CRLF, and if we don't find it, we bail. If we read more than + # 16 hex characters, (the number needed to represent a 64-bit + # chunk size), we bail (this protects us from a client that + # sends an infinite stream of `F`, for example). buf = BytesIO() while 1: @@ -222,16 +265,20 @@ class Input(object): if not char: self._chunked_input_error = True raise _InvalidClientInput("EOF before chunk end reached") - if char == b'\r': - break - if char == b';': + + if char in ( + b'\r', # Beginning EOL + b';', # Beginning extension + ): break - if char not in _HEX: + if char not in _HEX: # Invalid data. self._chunked_input_error = True raise _InvalidClientInput("Non-hex data", char) + buf.write(char) - if buf.tell() > 16: + + if buf.tell() > 16: # Too many hex bytes self._chunked_input_error = True raise _InvalidClientInput("Chunk-size too large.") @@ -251,11 +298,72 @@ class Input(object): if char == b'\r': # We either got here from the main loop or from the # end of an extension + self.__read_chunk_size_crlf(rfile, newline_only=True) + result = int(buf.getvalue(), 16) + if result == 0: + # The only time a chunk size of zero is allowed is the final + # chunk. It is either followed by another \r\n, or some trailers + # which are then followed by \r\n. + while self.__read_chunk_trailer(rfile): + pass + return result + + # Trailers have the following production (they are a header-field followed by CRLF) + # See above for the definition of "token". + # + # header-field = field-name ":" OWS field-value OWS + # field-name = token + # field-value = *( field-content / obs-fold ) + # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] + # field-vchar = VCHAR / obs-text + # obs-fold = CRLF 1*( SP / HTAB ) + # ; obsolete line folding + # ; see Section 3.2.4 + + + def __read_chunk_trailer(self, rfile, ): + # With rfile positioned just after a \r\n, read a trailer line. + # Return a true value if a non-empty trailer was read, and + # return false if an empty trailer was read (meaning the trailers are + # done). + # If a single line exceeds the MAX_REQUEST_LINE, raise an exception. + # If the field-name portion contains invalid characters, raise an exception. + + i = 0 + empty = True + seen_field_name = False + while i < MAX_REQUEST_LINE: + char = rfile.read(1) + if char == b'\r': + # Either read the next \n or raise an error. + self.__read_chunk_size_crlf(rfile, newline_only=True) + break + # Not a \r, so we are NOT an empty chunk. + empty = False + if char == b':' and i > 0: + # We're ending the field-name part; stop validating characters. + # Unless : was the first character... + seen_field_name = True + if not seen_field_name and char not in _ALLOWED_TOKEN_CHARS: + raise _InvalidClientInput('Invalid token character: %r' % (char,)) + i += 1 + else: + # We read too much + self._chunked_input_error = True + raise _InvalidClientInput("Too large chunk trailer") + return not empty + + def __read_chunk_size_crlf(self, rfile, newline_only=False): + # Also for safety, correctly verify that we get \r\n when expected. + if not newline_only: char = rfile.read(1) - if char != b'\n': + if char != b'\r': self._chunked_input_error = True - raise _InvalidClientInput("Line didn't end in CRLF") - return int(buf.getvalue(), 16) + raise _InvalidClientInput("Line didn't end in CRLF: %r" % (char,)) + char = rfile.read(1) + if char != b'\n': + self._chunked_input_error = True + raise _InvalidClientInput("Line didn't end in LF: %r" % (char,)) def _chunked_read(self, length=None, use_readline=False): # pylint:disable=too-many-branches @@ -291,7 +399,7 @@ class Input(object): self.position += datalen if self.chunk_length == self.position: - rfile.readline() + self.__read_chunk_size_crlf(rfile) if length is not None: length -= datalen @@ -304,9 +412,9 @@ class Input(object): # determine the next size to read self.chunk_length = self.__read_chunk_length(rfile) self.position = 0 - if self.chunk_length == 0: - # Last chunk. Terminates with a CRLF. - rfile.readline() + # If chunk_length was 0, we already read any trailers and + # validated that we have ended with \r\n\r\n. + return b''.join(response) def read(self, length=None): @@ -521,7 +629,8 @@ class WSGIHandler(object): elif len(words) == 2: self.command, self.path = words if self.command != "GET": - raise _InvalidClientRequest('Expected GET method: %r', raw_requestline) + raise _InvalidClientRequest('Expected GET method; Got command=%r; path=%r; raw=%r' % ( + self.command, self.path, raw_requestline,)) self.request_version = "HTTP/0.9" # QQQ I'm pretty sure we can drop support for HTTP/0.9 else: @@ -936,14 +1045,28 @@ class WSGIHandler(object): finally: try: self.wsgi_input._discard() + except _InvalidClientInput: + # This one is deliberately raised to the outer + # scope, because, with the incoming stream in some bad state, + # we can't be sure we can synchronize and properly parse the next + # request. + raise except (socket.error, IOError): - # Don't let exceptions during discarding + # Don't let socket exceptions during discarding # input override any exception that may have been # raised by the application, such as our own _InvalidClientInput. # In the general case, these aren't even worth logging (see the comment # just below) pass - except _InvalidClientInput: + except _InvalidClientInput as ex: + # DO log this one because: + # - Some of the data may have been read and acted on by the + # application; + # - The response may or may not have been sent; + # - It's likely that the client is bad, or malicious, and + # users might wish to take steps to block the client. + self._handle_client_error(ex) + self.close_connection = True self._send_error_response_if_possible(400) except socket.error as ex: if ex.args[0] in (errno.EPIPE, errno.ECONNRESET): @@ -994,16 +1117,22 @@ class WSGIHandler(object): def _handle_client_error(self, ex): # Called for invalid client input # Returns the appropriate error response. - if not isinstance(ex, ValueError): + if not isinstance(ex, (ValueError, _InvalidClientInput)): # XXX: Why not self._log_error to send it through the loop's # handle_error method? + # _InvalidClientRequest is a ValueError; + # _InvalidClientInput is an IOError. traceback.print_exc() if isinstance(ex, _InvalidClientRequest): # These come with good error messages, and we want to let # log_error deal with the formatting, especially to handle encoding - self.log_error(*ex.args) + # However, the error messages do not include the requesting IP + # necessarily, so we do add that. + self.log_error('(from %s) %s', self.client_address, ex.formatted_message) else: - self.log_error('Invalid request: %s', str(ex) or ex.__class__.__name__) + self.log_error('Invalid request (from %s): %s', + self.client_address, + str(ex) or ex.__class__.__name__) return ('400', _BAD_REQUEST_RESPONSE) def _headers(self): --- a/src/gevent/subprocess.py +++ b/src/gevent/subprocess.py @@ -280,10 +280,11 @@ def check_output(*popenargs, **kwargs): To capture standard error in the result, use ``stderr=STDOUT``:: - >>> check_output(["/bin/sh", "-c", + >>> output = check_output(["/bin/sh", "-c", ... "ls -l non_existent_file ; exit 0"], - ... stderr=STDOUT) - 'ls: non_existent_file: No such file or directory\n' + ... stderr=STDOUT).decode('ascii').strip() + >>> print(output.rsplit(':', 1)[1].strip()) + No such file or directory There is an additional optional argument, "input", allowing you to pass a string to the subprocess's stdin. If you use this argument
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor