|
11 | 11 | from pandas.compat.numpy import np_array_datetime64_compat
|
12 | 12 |
|
13 | 13 | from pandas.core.dtypes.common import (
|
| 14 | + is_categorical_dtype, |
14 | 15 | is_datetime64_dtype,
|
15 | 16 | is_datetime64tz_dtype,
|
16 | 17 | is_object_dtype,
|
@@ -802,62 +803,83 @@ def test_fillna(self):
|
802 | 803 | assert o is not result
|
803 | 804 |
|
804 | 805 | @pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
|
805 |
| - def test_memory_usage(self): |
806 |
| - for o in self.objs: |
807 |
| - res = o.memory_usage() |
808 |
| - res_deep = o.memory_usage(deep=True) |
809 |
| - |
810 |
| - if is_object_dtype(o) or ( |
811 |
| - isinstance(o, Series) and is_object_dtype(o.index) |
812 |
| - ): |
813 |
| - # if there are objects, only deep will pick them up |
814 |
| - assert res_deep > res |
815 |
| - else: |
816 |
| - assert res == res_deep |
817 |
| - |
818 |
| - if isinstance(o, Series): |
819 |
| - assert ( |
820 |
| - o.memory_usage(index=False) + o.index.memory_usage() |
821 |
| - ) == o.memory_usage(index=True) |
| 806 | + def test_memory_usage(self, index_or_series_obj): |
| 807 | + obj = index_or_series_obj |
| 808 | + res = obj.memory_usage() |
| 809 | + res_deep = obj.memory_usage(deep=True) |
822 | 810 |
|
823 |
| - # sys.getsizeof will call the .memory_usage with |
824 |
| - # deep=True, and add on some GC overhead |
825 |
| - diff = res_deep - sys.getsizeof(o) |
826 |
| - assert abs(diff) < 100 |
| 811 | + is_object = is_object_dtype(obj) or ( |
| 812 | + isinstance(obj, Series) and is_object_dtype(obj.index) |
| 813 | + ) |
| 814 | + is_categorical = is_categorical_dtype(obj) or ( |
| 815 | + isinstance(obj, Series) and is_categorical_dtype(obj.index) |
| 816 | + ) |
827 | 817 |
|
828 |
| - def test_searchsorted(self): |
829 |
| - # See gh-12238 |
830 |
| - for o in self.objs: |
831 |
| - index = np.searchsorted(o, max(o)) |
832 |
| - assert 0 <= index <= len(o) |
| 818 | + if len(obj) == 0: |
| 819 | + assert res_deep == res == 0 |
| 820 | + elif is_object or is_categorical: |
| 821 | + # only deep will pick them up |
| 822 | + assert res_deep > res |
| 823 | + else: |
| 824 | + assert res == res_deep |
833 | 825 |
|
834 |
| - index = np.searchsorted(o, max(o), sorter=range(len(o))) |
835 |
| - assert 0 <= index <= len(o) |
| 826 | + # sys.getsizeof will call the .memory_usage with |
| 827 | + # deep=True, and add on some GC overhead |
| 828 | + diff = res_deep - sys.getsizeof(obj) |
| 829 | + assert abs(diff) < 100 |
836 | 830 |
|
837 |
| - def test_validate_bool_args(self): |
838 |
| - invalid_values = [1, "True", [1, 2, 3], 5.0] |
| 831 | + def test_memory_usage_components_series(self, series_with_simple_index): |
| 832 | + series = series_with_simple_index |
| 833 | + total_usage = series.memory_usage(index=True) |
| 834 | + non_index_usage = series.memory_usage(index=False) |
| 835 | + index_usage = series.index.memory_usage() |
| 836 | + assert total_usage == non_index_usage + index_usage |
| 837 | + |
| 838 | + def test_memory_usage_components_narrow_series(self, narrow_series): |
| 839 | + series = narrow_series |
| 840 | + total_usage = series.memory_usage(index=True) |
| 841 | + non_index_usage = series.memory_usage(index=False) |
| 842 | + index_usage = series.index.memory_usage() |
| 843 | + assert total_usage == non_index_usage + index_usage |
| 844 | + |
| 845 | + def test_searchsorted(self, index_or_series_obj): |
| 846 | + # numpy.searchsorted calls obj.searchsorted under the hood. |
| 847 | + # See gh-12238 |
| 848 | + obj = index_or_series_obj |
839 | 849 |
|
840 |
| - for value in invalid_values: |
841 |
| - msg = "expected type bool" |
842 |
| - with pytest.raises(ValueError, match=msg): |
843 |
| - self.int_series.drop_duplicates(inplace=value) |
| 850 | + if isinstance(obj, pd.MultiIndex): |
| 851 | + # See gh-14833 |
| 852 | + pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") |
844 | 853 |
|
845 |
| - def test_getitem(self): |
846 |
| - for i in self.indexes: |
847 |
| - s = pd.Series(i) |
| 854 | + max_obj = max(obj, default=0) |
| 855 | + index = np.searchsorted(obj, max_obj) |
| 856 | + assert 0 <= index <= len(obj) |
848 | 857 |
|
849 |
| - assert i[0] == s.iloc[0] |
850 |
| - assert i[5] == s.iloc[5] |
851 |
| - assert i[-1] == s.iloc[-1] |
| 858 | + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) |
| 859 | + assert 0 <= index <= len(obj) |
852 | 860 |
|
853 |
| - assert i[-1] == i[9] |
| 861 | + def test_access_by_position(self, indices): |
| 862 | + index = indices |
854 | 863 |
|
855 |
| - msg = "index 20 is out of bounds for axis 0 with size 10" |
856 |
| - with pytest.raises(IndexError, match=msg): |
857 |
| - i[20] |
858 |
| - msg = "single positional indexer is out-of-bounds" |
859 |
| - with pytest.raises(IndexError, match=msg): |
860 |
| - s.iloc[20] |
| 864 | + if len(index) == 0: |
| 865 | + pytest.skip("Test doesn't make sense on empty data") |
| 866 | + elif isinstance(index, pd.MultiIndex): |
| 867 | + pytest.skip("Can't instantiate Series from MultiIndex") |
| 868 | + |
| 869 | + series = pd.Series(index) |
| 870 | + assert index[0] == series.iloc[0] |
| 871 | + assert index[5] == series.iloc[5] |
| 872 | + assert index[-1] == series.iloc[-1] |
| 873 | + |
| 874 | + size = len(index) |
| 875 | + assert index[-1] == index[size - 1] |
| 876 | + |
| 877 | + msg = f"index {size} is out of bounds for axis 0 with size {size}" |
| 878 | + with pytest.raises(IndexError, match=msg): |
| 879 | + index[size] |
| 880 | + msg = "single positional indexer is out-of-bounds" |
| 881 | + with pytest.raises(IndexError, match=msg): |
| 882 | + series.iloc[size] |
861 | 883 |
|
862 | 884 | @pytest.mark.parametrize("indexer_klass", [list, pd.Index])
|
863 | 885 | @pytest.mark.parametrize(
|
|
0 commit comments