-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: fix issue with concat creating SparseFrame if not all series are sparse. #18924
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
ba8dc29
05a0717
6fc6369
4116da7
2ee0391
44aa41a
1b9e976
3c0a4da
90a5003
126db41
6d01387
81aba2e
0768990
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
# pylint: disable-msg=E1101,W0612 | ||
import pytest | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pandas.util.testing as tm | ||
import itertools | ||
|
||
|
||
class TestSparseSeriesConcat(object): | ||
|
@@ -317,37 +319,56 @@ def test_concat_axis1(self): | |
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
def test_concat_sparse_dense(self): | ||
sparse = self.dense1.to_sparse() | ||
|
||
res = pd.concat([sparse, self.dense2]) | ||
exp = pd.concat([self.dense1, self.dense2]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense2, sparse]) | ||
exp = pd.concat([self.dense2, self.dense1]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
sparse = self.dense1.to_sparse(fill_value=0) | ||
|
||
res = pd.concat([sparse, self.dense2]) | ||
exp = pd.concat([self.dense1, self.dense2]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense2, sparse]) | ||
exp = pd.concat([self.dense2, self.dense1]) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
res = pd.concat([self.dense3, sparse], axis=1) | ||
exp = pd.concat([self.dense3, self.dense1], axis=1) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res, exp) | ||
|
||
res = pd.concat([sparse, self.dense3], axis=1) | ||
exp = pd.concat([self.dense1, self.dense3], axis=1) | ||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res, exp) | ||
@pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', | ||
itertools.product([None, 0, 1, np.nan], | ||
[0, 1], | ||
[1, 0])) | ||
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx): | ||
frames = [self.dense1, self.dense2] | ||
sparse_frame = [frames[dense_idx], | ||
frames[sparse_idx].to_sparse(fill_value=fill_value)] | ||
dense_frame = [frames[dense_idx], frames[sparse_idx]] | ||
|
||
for _ in range(2): | ||
res = pd.concat(sparse_frame) | ||
exp = pd.concat(dense_frame) | ||
|
||
assert isinstance(res, pd.SparseDataFrame) | ||
tm.assert_frame_equal(res.to_dense(), exp) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment here (and in test below), the purposes of the reverses & the loop |
||
sparse_frame = sparse_frame[::-1] | ||
dense_frame = dense_frame[::-1] | ||
|
||
@pytest.mark.parametrize('fill_value,sparse_idx,dense_idx', | ||
itertools.product([None, 0, 1, np.nan], | ||
[0, 1], | ||
[1, 0])) | ||
def test_concat_sparse_dense_cols(self, fill_value, sparse_idx, dense_idx): | ||
frames = [self.dense1, self.dense3] | ||
|
||
sparse_frame = [frames[dense_idx], | ||
frames[sparse_idx].to_sparse(fill_value=fill_value)] | ||
dense_frame = [frames[dense_idx], frames[sparse_idx]] | ||
|
||
for _ in range(2): | ||
res = pd.concat(sparse_frame, axis=1) | ||
exp = pd.concat(dense_frame, axis=1) | ||
|
||
# See GH18914 and #18686 for why this should be | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would remove these asserts (357-364), the assert on the frame/series is comprehensive. you can not the issue numbers at the top of the test instead |
||
# A DataFrame | ||
assert type(res) is pd.DataFrame | ||
# See GH16874 | ||
assert not res.isnull().empty | ||
assert not res[res.columns[0]].empty | ||
assert res.iloc[0, 0] == exp.iloc[0, 0] | ||
|
||
for column in frames[dense_idx].columns: | ||
if dense_idx == sparse_idx: | ||
tm.assert_frame_equal(res[column], exp[column]) | ||
else: | ||
tm.assert_series_equal(res[column], exp[column]) | ||
|
||
tm.assert_frame_equal(res, exp) | ||
|
||
sparse_frame = sparse_frame[::-1] | ||
dense_frame = dense_frame[::-1] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could put this right before you actually use SparseDataFrame