Skip to content

Commit 96594f1

Browse files
author
Shashwat
committed
Merge remote-tracking branch 'upstream/main' into doc-date-parser
2 parents 71466b6 + d800024 commit 96594f1

File tree

7 files changed

+57
-14
lines changed

7 files changed

+57
-14
lines changed

doc/source/whatsnew/v2.0.0.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
3838
* :func:`read_csv`
3939
* :func:`read_excel`
4040
* :func:`read_sql`
41+
* :func:`read_sql_query`
42+
* :func:`read_sql_table`
4143

4244
Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
4345
to select the nullable dtypes implementation.
@@ -394,7 +396,7 @@ If installed, we now require:
394396
+-----------------+-----------------+----------+---------+
395397
| Package | Minimum Version | Required | Changed |
396398
+=================+=================+==========+=========+
397-
| mypy (dev) | 0.990 | | X |
399+
| mypy (dev) | 0.991 | | X |
398400
+-----------------+-----------------+----------+---------+
399401
| python-dateutil | 2.8.2 | X | X |
400402
+-----------------+-----------------+----------+---------+
@@ -880,7 +882,7 @@ I/O
880882
- Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
881883
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
882884
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
883-
-
885+
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
884886

885887
Period
886888
^^^^^^

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ dependencies:
8080
- flake8=6.0.0
8181
- flake8-bugbear=22.7.1 # used by flake8, find likely bugs
8282
- isort>=5.2.1 # check that imports are in the right order
83-
- mypy=0.990
83+
- mypy=0.991
8484
- pre-commit>=2.15.0
8585
- pycodestyle # used by flake8
8686
- pyupgrade

pandas/_libs/src/ujson/python/objToJSON.c

+11-2
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,18 @@ static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
332332
return PyBytes_AS_STRING(obj);
333333
}
334334

335-
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc),
335+
static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
336336
size_t *_outLen) {
337-
return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen);
337+
char *encoded = (char *)PyUnicode_AsUTF8AndSize(_obj,
338+
(Py_ssize_t *)_outLen);
339+
if (encoded == NULL) {
340+
/* Something went wrong.
341+
Set errorMsg(to tell encoder to stop),
342+
and let Python exception propagate. */
343+
JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
344+
enc->errorMsg = "Encoding failed.";
345+
}
346+
return encoded;
338347
}
339348

340349
/* JSON callback. returns a char* and mutates the pointer to *len */

pandas/io/sql.py

+20
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ def read_sql_table(
224224
parse_dates: list[str] | dict[str, str] | None = ...,
225225
columns: list[str] | None = ...,
226226
chunksize: None = ...,
227+
use_nullable_dtypes: bool = ...,
227228
) -> DataFrame:
228229
...
229230

@@ -238,6 +239,7 @@ def read_sql_table(
238239
parse_dates: list[str] | dict[str, str] | None = ...,
239240
columns: list[str] | None = ...,
240241
chunksize: int = ...,
242+
use_nullable_dtypes: bool = ...,
241243
) -> Iterator[DataFrame]:
242244
...
243245

@@ -251,6 +253,7 @@ def read_sql_table(
251253
parse_dates: list[str] | dict[str, str] | None = None,
252254
columns: list[str] | None = None,
253255
chunksize: int | None = None,
256+
use_nullable_dtypes: bool = False,
254257
) -> DataFrame | Iterator[DataFrame]:
255258
"""
256259
Read SQL database table into a DataFrame.
@@ -287,6 +290,12 @@ def read_sql_table(
287290
chunksize : int, default None
288291
If specified, returns an iterator where `chunksize` is the number of
289292
rows to include in each chunk.
293+
use_nullable_dtypes : bool = False
294+
Whether to use nullable dtypes as default when reading data. If
295+
set to True, nullable dtypes are used for all dtypes that have a nullable
296+
implementation, even if no nulls are present.
297+
298+
.. versionadded:: 2.0
290299
291300
Returns
292301
-------
@@ -318,6 +327,7 @@ def read_sql_table(
318327
parse_dates=parse_dates,
319328
columns=columns,
320329
chunksize=chunksize,
330+
use_nullable_dtypes=use_nullable_dtypes,
321331
)
322332

323333
if table is not None:
@@ -336,6 +346,7 @@ def read_sql_query(
336346
parse_dates: list[str] | dict[str, str] | None = ...,
337347
chunksize: None = ...,
338348
dtype: DtypeArg | None = ...,
349+
use_nullable_dtypes: bool = ...,
339350
) -> DataFrame:
340351
...
341352

@@ -350,6 +361,7 @@ def read_sql_query(
350361
parse_dates: list[str] | dict[str, str] | None = ...,
351362
chunksize: int = ...,
352363
dtype: DtypeArg | None = ...,
364+
use_nullable_dtypes: bool = ...,
353365
) -> Iterator[DataFrame]:
354366
...
355367

@@ -363,6 +375,7 @@ def read_sql_query(
363375
parse_dates: list[str] | dict[str, str] | None = None,
364376
chunksize: int | None = None,
365377
dtype: DtypeArg | None = None,
378+
use_nullable_dtypes: bool = False,
366379
) -> DataFrame | Iterator[DataFrame]:
367380
"""
368381
Read SQL query into a DataFrame.
@@ -406,6 +419,12 @@ def read_sql_query(
406419
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
407420
408421
.. versionadded:: 1.3.0
422+
use_nullable_dtypes : bool = False
423+
Whether to use nullable dtypes as default when reading data. If
424+
set to True, nullable dtypes are used for all dtypes that have a nullable
425+
implementation, even if no nulls are present.
426+
427+
.. versionadded:: 2.0
409428
410429
Returns
411430
-------
@@ -430,6 +449,7 @@ def read_sql_query(
430449
parse_dates=parse_dates,
431450
chunksize=chunksize,
432451
dtype=dtype,
452+
use_nullable_dtypes=use_nullable_dtypes,
433453
)
434454

435455

pandas/tests/io/json/test_ujson.py

+9
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ def test_encode_unicode_4bytes_utf8highest(self):
291291
assert enc == json.dumps(four_bytes_input)
292292
assert dec == json.loads(enc)
293293

294+
def test_encode_unicode_error(self):
295+
string = "'\udac0'"
296+
msg = (
297+
r"'utf-8' codec can't encode character '\\udac0' "
298+
r"in position 1: surrogates not allowed"
299+
)
300+
with pytest.raises(UnicodeEncodeError, match=msg):
301+
ujson.dumps([string])
302+
294303
def test_encode_array_in_array(self):
295304
arr_in_arr_input = [[[[]]]]
296305
output = ujson.encode(arr_in_arr_input)

pandas/tests/io/test_sql.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -2276,21 +2276,22 @@ def test_get_engine_auto_error_message(self):
22762276
pass
22772277
# TODO(GH#36893) fill this in when we add more engines
22782278

2279-
def test_read_sql_nullable_dtypes(self, string_storage):
2279+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
2280+
def test_read_sql_nullable_dtypes(self, string_storage, func):
22802281
# GH#50048
22812282
table = "test"
22822283
df = self.nullable_data()
22832284
df.to_sql(table, self.conn, index=False, if_exists="replace")
22842285

22852286
with pd.option_context("mode.string_storage", string_storage):
2286-
result = pd.read_sql(
2287+
result = getattr(pd, func)(
22872288
f"Select * from {table}", self.conn, use_nullable_dtypes=True
22882289
)
22892290
expected = self.nullable_expected(string_storage)
22902291
tm.assert_frame_equal(result, expected)
22912292

22922293
with pd.option_context("mode.string_storage", string_storage):
2293-
iterator = pd.read_sql(
2294+
iterator = getattr(pd, func)(
22942295
f"Select * from {table}",
22952296
self.conn,
22962297
use_nullable_dtypes=True,
@@ -2300,20 +2301,21 @@ def test_read_sql_nullable_dtypes(self, string_storage):
23002301
for result in iterator:
23012302
tm.assert_frame_equal(result, expected)
23022303

2303-
def test_read_sql_nullable_dtypes_table(self, string_storage):
2304+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
2305+
def test_read_sql_nullable_dtypes_table(self, string_storage, func):
23042306
# GH#50048
23052307
table = "test"
23062308
df = self.nullable_data()
23072309
df.to_sql(table, self.conn, index=False, if_exists="replace")
23082310

23092311
with pd.option_context("mode.string_storage", string_storage):
2310-
result = pd.read_sql(table, self.conn, use_nullable_dtypes=True)
2312+
result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True)
23112313
expected = self.nullable_expected(string_storage)
23122314
tm.assert_frame_equal(result, expected)
23132315

23142316
with pd.option_context("mode.string_storage", string_storage):
2315-
iterator = pd.read_sql(
2316-
f"Select * from {table}",
2317+
iterator = getattr(pd, func)(
2318+
table,
23172319
self.conn,
23182320
use_nullable_dtypes=True,
23192321
chunksize=3,
@@ -2463,7 +2465,8 @@ class Test(BaseModel):
24632465
def nullable_expected(self, storage) -> DataFrame:
24642466
return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
24652467

2466-
def test_read_sql_nullable_dtypes_table(self, string_storage):
2468+
@pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
2469+
def test_read_sql_nullable_dtypes_table(self, string_storage, func):
24672470
# GH#50048 Not supported for sqlite
24682471
pass
24692472

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ cpplint
5757
flake8==6.0.0
5858
flake8-bugbear==22.7.1
5959
isort>=5.2.1
60-
mypy==0.990
60+
mypy==0.991
6161
pre-commit>=2.15.0
6262
pycodestyle
6363
pyupgrade

0 commit comments

Comments
 (0)