Skip to content

Commit 8ece878

Browse files
fix(apm): fix unicode decode errors in pylons (#5029)
## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/contributing.html#Release-Note-Guidelines) are followed. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). This PR is related to a critical exception detected two days ago on https://app.datadoghq.com/logs <img width="1352" alt="Pylons Unicode Error" src="https://user-images.githubusercontent.com/114495376/216595049-a0fa6bb0-5c2b-4e5f-ac97-e83000c225eb.png"> Silently drop non translatable UTF-8 characters instead of raising critical exception in Pylons framework. ## Reviewer Checklist - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library.
1 parent ad35567 commit 8ece878

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

ddtrace/contrib/pylons/middleware.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ def __call__(self, environ, start_response):
107107
if hasattr(request, "json"):
108108
req_body = request.json
109109
else:
110-
req_body = json.loads(request.body.decode("UTF-8"))
110+
req_body = json.loads(request.body.decode(request.charset or "utf-8", errors="ignore"))
111111
elif content_type in ("application/xml", "text/xml"):
112-
req_body = xmltodict.parse(request.body.decode("UTF-8"))
112+
req_body = xmltodict.parse(request.body.decode(request.charset or "utf-8", errors="ignore"))
113113
else: # text/plain, xml, others: take them as strings
114-
req_body = request.body.decode("UTF-8")
114+
req_body = request.body.decode(request.charset or "utf-8", errors="ignore")
115115

116116
except (
117117
AttributeError,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
pylons: This fix resolves an issue where ``str.decode`` could cause critical unicode decode errors when ASM is enabled. ASM is disabled by default.

tests/contrib/pylons/test_pylons.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
# -*- coding: utf-8 -*-
12
import json
23
import logging
34
import os
45

6+
import mock
57
from paste import fixture
68
from paste.deploy import loadapp
79
import pylons
@@ -674,11 +676,60 @@ def test_pylons_body_xml(self):
674676
assert span
675677
assert span["mytestingbody_key"] == "mytestingbody_value"
676678

677-
def test_pylons_body_xml_attack(self):
679+
def test_pylons_body_json_unicode_decode_error_charset(self):
680+
# Regression test, if request.charset returns None, a TypeError is raised
681+
with self.override_global_config(dict(_appsec_enabled=True)):
682+
683+
with override_env(dict(DD_APPSEC_RULES=RULES_GOOD_PATH)), mock.patch(
684+
"ddtrace.contrib.pylons.middleware.Request.charset", new_callable=mock.PropertyMock
685+
) as mock_charset:
686+
mock_charset.return_value = None
687+
self.tracer._appsec_enabled = True
688+
# Hack: need to pass an argument to configure so that the processors are recreated
689+
self.tracer.configure(api_version="v0.4")
690+
self.app.post(
691+
url_for(controller="root", action="index"),
692+
params=b"\x80",
693+
extra_environ={"CONTENT_TYPE": "application/json"},
694+
)
695+
696+
spans = self.pop_spans()
697+
assert spans
698+
699+
root_span = spans[0]
700+
appsec_json = root_span.get_tag("_dd.appsec.json")
701+
assert appsec_json is None
702+
703+
assert "UnicodeDecodeError" not in self._caplog.text
704+
assert _context.get_item("http.request.body", span=root_span) is None
705+
706+
def test_pylons_body_json_unicode_decode_error(self):
707+
with self.override_global_config(dict(_appsec_enabled=True)):
708+
with override_env(dict(DD_APPSEC_RULES=RULES_GOOD_PATH)):
709+
self.tracer._appsec_enabled = True
710+
# Hack: need to pass an argument to configure so that the processors are recreated
711+
self.tracer.configure(api_version="v0.4")
712+
self.app.post(
713+
url_for(controller="root", action="index"),
714+
params=b"\x80",
715+
extra_environ={"CONTENT_TYPE": "application/json"},
716+
)
717+
718+
spans = self.pop_spans()
719+
assert spans
720+
721+
root_span = spans[0]
722+
appsec_json = root_span.get_tag("_dd.appsec.json")
723+
assert appsec_json is None
724+
725+
assert "UnicodeDecodeError" not in self._caplog.text
726+
assert _context.get_item("http.request.body", span=root_span) is None
727+
728+
def test_pylons_body_xml_attack_and_unicode_decode_error(self):
678729
with override_global_config(dict(_appsec_enabled=True)):
679730
# Hack: need to pass an argument to configure so that the processors are recreated
680731
self.tracer.configure(api_version="v0.4")
681-
payload = "<attack>1' or '1' = '1'</attack>"
732+
payload = b"\x80<attack>1' or '1' = '1'</attack>"
682733
self.app.post(
683734
url_for(controller="root", action="index"),
684735
params=payload,
@@ -690,6 +741,8 @@ def test_pylons_body_xml_attack(self):
690741

691742
root_span = spans[0]
692743
assert root_span
744+
745+
assert "UnicodeDecodeError" not in self._caplog.text
693746
assert root_span.get_tag("_dd.appsec.json") is None
694747

695748
span = dict(_context.get_item("http.request.body", span=root_span))

0 commit comments

Comments
 (0)