Skip to content

Commit 35b98e3

Browse files
authored
bpo-43086: Add handling for out-of-spec data in a2b_base64 (GH-24402)
binascii.a2b_base64 gains a strict_mode= parameter. When enabled it will raise an error on input that deviates from the base64 spec in any way. The default remains False for backward compatibility. Code reviews and minor tweaks by: Gregory P. Smith <[email protected]> [Google]
1 parent b494685 commit 35b98e3

File tree

5 files changed

+132
-15
lines changed

5 files changed

+132
-15
lines changed

Doc/library/binascii.rst

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,23 @@ The :mod:`binascii` module defines the following functions:
5050
Added the *backtick* parameter.
5151

5252

53-
.. function:: a2b_base64(string)
53+
.. function:: a2b_base64(string, strict_mode=False)
5454

5555
Convert a block of base64 data back to binary and return the binary data. More
5656
than one line may be passed at a time.
5757

58+
If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
59+
data will raise :exc:`binascii.Error`.
60+
61+
Valid base64:
62+
* Conforms to :rfc:`3548`.
63+
* Contains only characters from the base64 alphabet.
64+
* Contains no excess data after padding (including excess padding, newlines, etc.).
65+
* Does not start with a padding.
66+
67+
.. versionchanged:: 3.11
68+
Added the *strict_mode* parameter.
69+
5870

5971
.. function:: b2a_base64(data, *, newline=True)
6072

Lib/test/test_binascii.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,47 @@ def addnoise(line):
114114
# empty strings. TBD: shouldn't it raise an exception instead ?
115115
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
116116

117+
def test_base64_strict_mode(self):
118+
# Test base64 with strict mode on
119+
def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
120+
with self.assertRaisesRegex(binascii.Error, assert_regex):
121+
binascii.a2b_base64(self.type2test(data), strict_mode=True)
122+
self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
123+
non_strict_mode_expected_result)
124+
self.assertEqual(binascii.a2b_base64(self.type2test(data)),
125+
non_strict_mode_expected_result)
126+
127+
def assertExcessData(data, non_strict_mode_expected_result: bytes):
128+
_assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
129+
130+
def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
131+
_assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
132+
133+
def assertMalformedPadding(data, non_strict_mode_expected_result: bytes):
134+
_assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
135+
136+
# Test excess data exceptions
137+
assertExcessData(b'ab==a', b'i')
138+
assertExcessData(b'ab===', b'i')
139+
assertExcessData(b'ab==:', b'i')
140+
assertExcessData(b'abc=a', b'i\xb7')
141+
assertExcessData(b'abc=:', b'i\xb7')
142+
assertExcessData(b'ab==\n', b'i')
143+
144+
# Test non-base64 data exceptions
145+
assertNonBase64Data(b'\nab==', b'i')
146+
assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
147+
assertNonBase64Data(b'a\nb==', b'i')
148+
assertNonBase64Data(b'a\x00b==', b'i')
149+
150+
# Test malformed padding
151+
assertMalformedPadding(b'=', b'')
152+
assertMalformedPadding(b'==', b'')
153+
assertMalformedPadding(b'===', b'')
154+
assertMalformedPadding(b'ab=c=', b'i\xb7')
155+
assertMalformedPadding(b'ab=ab==', b'i\xb6\x9b')
156+
157+
117158
def test_base64errors(self):
118159
# Test base64 with invalid padding
119160
def assertIncorrectPadding(data):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Added a new optional :code:`strict_mode` parameter to *binascii.a2b_base64*.
2+
When :code:`scrict_mode` is set to :code:`True`, the *a2b_base64* function will accept only valid base64 content.
3+
More details about what "valid base64 content" is, can be found in the function's documentation.

Modules/binascii.c

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -433,18 +433,26 @@ binascii.a2b_base64
433433
434434
data: ascii_buffer
435435
/
436+
*
437+
strict_mode: bool(accept={int}) = False
436438
437439
Decode a line of base64 data.
440+
441+
strict_mode
442+
When set to True, bytes that are not part of the base64 standard are not allowed.
443+
The same applies to excess data after padding (= / ==).
438444
[clinic start generated code]*/
439445

440446
static PyObject *
441-
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
442-
/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
447+
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
448+
/*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
443449
{
444450
assert(data->len >= 0);
445451

446452
const unsigned char *ascii_data = data->buf;
447453
size_t ascii_len = data->len;
454+
binascii_state *state = NULL;
455+
char padding_started = 0;
448456

449457
/* Allocate the buffer */
450458
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -455,6 +463,15 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
455463
return NULL;
456464
unsigned char *bin_data_start = bin_data;
457465

466+
if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
467+
malformed_padding:
468+
state = get_binascii_state(module);
469+
if (state) {
470+
PyErr_SetString(state->Error, "Leading padding not allowed");
471+
}
472+
goto error_end;
473+
}
474+
458475
int quad_pos = 0;
459476
unsigned char leftchar = 0;
460477
int pads = 0;
@@ -465,20 +482,42 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
465482
** the invalid ones.
466483
*/
467484
if (this_ch == BASE64_PAD) {
485+
padding_started = 1;
486+
468487
if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
469-
/* A pad sequence means no more input.
470-
** We've already interpreted the data
471-
** from the quad at this point.
488+
/* A pad sequence means we should not parse more input.
489+
** We've already interpreted the data from the quad at this point.
490+
** in strict mode, an error should raise if there's excess data after the padding.
472491
*/
492+
if (strict_mode && i + 1 < ascii_len) {
493+
state = get_binascii_state(module);
494+
if (state) {
495+
PyErr_SetString(state->Error, "Excess data after padding");
496+
}
497+
goto error_end;
498+
}
499+
473500
goto done;
474501
}
475502
continue;
476503
}
477504

478505
this_ch = table_a2b_base64[this_ch];
479506
if (this_ch >= 64) {
507+
if (strict_mode) {
508+
state = get_binascii_state(module);
509+
if (state) {
510+
PyErr_SetString(state->Error, "Only base64 data is allowed");
511+
}
512+
goto error_end;
513+
}
480514
continue;
481515
}
516+
517+
// Characters that are not '=', in the middle of the padding, are not allowed
518+
if (strict_mode && padding_started) {
519+
goto malformed_padding;
520+
}
482521
pads = 0;
483522

484523
switch (quad_pos) {
@@ -505,7 +544,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
505544
}
506545

507546
if (quad_pos != 0) {
508-
binascii_state *state = get_binascii_state(module);
547+
state = get_binascii_state(module);
509548
if (state == NULL) {
510549
/* error already set, from get_binascii_state */
511550
} else if (quad_pos == 1) {
@@ -522,6 +561,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
522561
} else {
523562
PyErr_SetString(state->Error, "Incorrect padding");
524563
}
564+
error_end:
525565
_PyBytesWriter_Dealloc(&writer);
526566
return NULL;
527567
}

Modules/clinic/binascii.c.h

Lines changed: 29 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)