Skip to content

DOC: Fix examples in reshape #32980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -283,14 +283,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
pytest -q --doctest-modules pandas/core/tools/datetimes.py
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Doctests top-level reshaping functions' ; echo $MSG
pytest -q --doctest-modules \
pandas/core/reshape/concat.py \
pandas/core/reshape/pivot.py \
pandas/core/reshape/reshape.py \
pandas/core/reshape/tile.py \
pandas/core/reshape/melt.py \
-k"-crosstab -pivot_table -cut"
MSG='Doctests reshaping functions' ; echo $MSG
pytest -q --doctest-modules pandas/core/reshape/
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Doctests interval classes' ; echo $MSG
Expand Down
128 changes: 87 additions & 41 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,14 @@ def merge_ordered(

Examples
--------
>>> A
>>> df1 = pd.DataFrame(
... {
... "key": ["a", "c", "e", "a", "c", "e"],
... "lvalue": [1, 2, 3, 1, 2, 3],
... "group": ["a", "a", "a", "b", "b", "b"]
... }
... )
>>> df1
key lvalue group
0 a 1 a
1 c 2 a
Expand All @@ -231,24 +238,25 @@ def merge_ordered(
4 c 2 b
5 e 3 b

>>> B
Key rvalue
0 b 1
1 c 2
2 d 3

>>> merge_ordered(A, B, fill_method='ffill', left_by='group')
group key lvalue rvalue
0 a a 1 NaN
1 a b 1 1.0
2 a c 2 2.0
3 a d 2 3.0
4 a e 3 3.0
5 b a 1 NaN
6 b b 1 1.0
7 b c 2 2.0
8 b d 2 3.0
9 b e 3 3.0
>>> df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
>>> df2
key rvalue
0 b 1
1 c 2
2 d 3

>>> merge_ordered(df1, df2, fill_method="ffill", left_by="group")
key lvalue group rvalue
0 a 1 a NaN
1 b 1 a 1.0
2 c 2 a 2.0
3 d 2 a 3.0
4 e 3 a 3.0
5 a 1 b NaN
6 b 1 b 1.0
7 c 2 b 2.0
8 d 2 b 3.0
9 e 3 b 3.0
"""

def _merger(x, y):
Expand Down Expand Up @@ -369,15 +377,14 @@ def merge_asof(

Examples
--------
>>> left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']})
>>> left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
>>> left
a left_val
0 1 a
1 5 b
2 10 c

>>> right = pd.DataFrame({'a': [1, 2, 3, 6, 7],
... 'right_val': [1, 2, 3, 6, 7]})
>>> right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
>>> right
a right_val
0 1 1
Expand All @@ -386,41 +393,40 @@ def merge_asof(
3 6 6
4 7 7

>>> pd.merge_asof(left, right, on='a')
>>> pd.merge_asof(left, right, on="a")
a left_val right_val
0 1 a 1
1 5 b 3
2 10 c 7

>>> pd.merge_asof(left, right, on='a', allow_exact_matches=False)
>>> pd.merge_asof(left, right, on="a", allow_exact_matches=False)
a left_val right_val
0 1 a NaN
1 5 b 3.0
2 10 c 7.0

>>> pd.merge_asof(left, right, on='a', direction='forward')
>>> pd.merge_asof(left, right, on="a", direction="forward")
a left_val right_val
0 1 a 1.0
1 5 b 6.0
2 10 c NaN

>>> pd.merge_asof(left, right, on='a', direction='nearest')
>>> pd.merge_asof(left, right, on="a", direction="nearest")
a left_val right_val
0 1 a 1
1 5 b 6
2 10 c 7

We can use indexed DataFrames as well.

>>> left = pd.DataFrame({'left_val': ['a', 'b', 'c']}, index=[1, 5, 10])
>>> left = pd.DataFrame({"left_val": ["a", "b", "c"]}, index=[1, 5, 10])
>>> left
left_val
1 a
5 b
10 c

>>> right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7]},
... index=[1, 2, 3, 6, 7])
>>> right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7]}, index=[1, 2, 3, 6, 7])
>>> right
right_val
1 1
Expand All @@ -437,6 +443,32 @@ def merge_asof(

Here is a real-world times-series example

>>> quotes = pd.DataFrame(
... {
... "time": [
... pd.Timestamp("2016-05-25 13:30:00.023"),
... pd.Timestamp("2016-05-25 13:30:00.023"),
... pd.Timestamp("2016-05-25 13:30:00.030"),
... pd.Timestamp("2016-05-25 13:30:00.041"),
... pd.Timestamp("2016-05-25 13:30:00.048"),
... pd.Timestamp("2016-05-25 13:30:00.049"),
... pd.Timestamp("2016-05-25 13:30:00.072"),
... pd.Timestamp("2016-05-25 13:30:00.075")
... ],
... "ticker": [
... "GOOG",
... "MSFT",
... "MSFT",
... "MSFT",
... "GOOG",
... "AAPL",
... "GOOG",
... "MSFT"
... ],
... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],
... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03]
... }
... )
>>> quotes
time ticker bid ask
0 2016-05-25 13:30:00.023 GOOG 720.50 720.93
Expand All @@ -448,6 +480,20 @@ def merge_asof(
6 2016-05-25 13:30:00.072 GOOG 720.50 720.88
7 2016-05-25 13:30:00.075 MSFT 52.01 52.03

>>> trades = pd.DataFrame(
... {
... "time": [
... pd.Timestamp("2016-05-25 13:30:00.023"),
... pd.Timestamp("2016-05-25 13:30:00.038"),
... pd.Timestamp("2016-05-25 13:30:00.048"),
... pd.Timestamp("2016-05-25 13:30:00.048"),
... pd.Timestamp("2016-05-25 13:30:00.048")
... ],
... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
... "price": [51.95, 51.95, 720.77, 720.92, 98.0],
... "quantity": [75, 155, 100, 100, 100]
... }
... )
>>> trades
time ticker price quantity
0 2016-05-25 13:30:00.023 MSFT 51.95 75
Expand All @@ -458,9 +504,7 @@ def merge_asof(

By default we are taking the asof of the quotes

>>> pd.merge_asof(trades, quotes,
... on='time',
... by='ticker')
>>> pd.merge_asof(trades, quotes, on="time", by="ticker")
time ticker price quantity bid ask
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98
Expand All @@ -470,10 +514,9 @@ def merge_asof(

We only asof within 2ms between the quote time and the trade time

>>> pd.merge_asof(trades, quotes,
... on='time',
... by='ticker',
... tolerance=pd.Timedelta('2ms'))
>>> pd.merge_asof(
... trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms")
... )
time ticker price quantity bid ask
0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96
1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN
Expand All @@ -485,11 +528,14 @@ def merge_asof(
and we exclude exact matches on time. However *prior* data will
propagate forward

>>> pd.merge_asof(trades, quotes,
... on='time',
... by='ticker',
... tolerance=pd.Timedelta('10ms'),
... allow_exact_matches=False)
>>> pd.merge_asof(
... trades,
... quotes,
... on="time",
... by="ticker",
... tolerance=pd.Timedelta("10ms"),
... allow_exact_matches=False
... )
time ticker price quantity bid ask
0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN
1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98
Expand Down
26 changes: 14 additions & 12 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,24 +171,26 @@ def cut(
... index=['a', 'b', 'c', 'd', 'e'])
>>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False)
... # doctest: +ELLIPSIS
(a 0.0
b 1.0
c 2.0
d 3.0
e 4.0
dtype: float64, array([0, 2, 4, 6, 8]))
(a 1.0
b 2.0
c 3.0
d 4.0
e NaN
dtype: float64,
array([ 0, 2, 4, 6, 8, 10]))

Use `drop` optional when bins is not unique

>>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
... right=False, duplicates='drop')
... # doctest: +ELLIPSIS
(a 0.0
b 1.0
c 2.0
(a 1.0
b 2.0
c 3.0
d 3.0
e 3.0
dtype: float64, array([0, 2, 4, 6, 8]))
e NaN
dtype: float64,
array([ 0, 2, 4, 6, 10]))

Passing an IntervalIndex for `bins` results in those categories exactly.
Notice that values not covered by the IntervalIndex are set to NaN. 0
Expand All @@ -197,7 +199,7 @@ def cut(

>>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
>>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
[NaN, (0, 1], NaN, (2, 3], (4, 5]]
[NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
"""
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/reshape/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ def cartesian_product(X):
Examples
--------
>>> cartesian_product([list('ABC'), [1, 2]])
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
array([1, 2, 1, 2, 1, 2])]
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]

See Also
--------
Expand Down