@@ -334,6 +334,133 @@ missing and interpolate over them:
334
334
335
335
ser.replace([1 , 2 , 3 ], method = ' pad' )
336
336
337
+ String/Regular Expression Replacement
338
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
339
+
340
+ .. note ::
341
+
342
+ Python strings prefixed with the ``r `` character such as ``r'hello world' ``
343
+ are so-called "raw" strings. They have different semantics regarding
344
+ backslashes than strings without this prefix. Backslashes in raw strings
345
+ will be interpreted as an escaped backslash, e.g., ``r'\' == '\\' ``. You
346
+ should `read about them
347
+ <http://docs.python.org/2/reference/lexical_analysis.html#string-literals> `_
348
+ if this is unclear.
349
+
350
+ Replace the '.' with ``nan `` (str -> str)
351
+
352
+ .. ipython :: python
353
+
354
+ from numpy.random import rand, randn
355
+ from numpy import nan
356
+ from pandas import DataFrame
357
+ d = {' a' : range (4 ), ' b' : list (' ab..' ), ' c' : [' a' , ' b' , nan, ' d' ]}
358
+ df = DataFrame(d)
359
+ df.replace(' .' , nan)
360
+
361
+ Now do it with a regular expression that removes surrounding whitespace
362
+ (regex -> regex)
363
+
364
+ .. ipython :: python
365
+
366
+ df.replace(r ' \s * \. \s * ' , nan, regex = True )
367
+
368
+ Replace a few different values (list -> list)
369
+
370
+ .. ipython :: python
371
+
372
+ df.replace([' a' , ' .' ], [' b' , nan])
373
+
374
+ list of regex -> list of regex
375
+
376
+ .. ipython :: python
377
+
378
+ df.replace([r ' \. ' , r ' ( a) ' ], [' dot' , ' \1 stuff' ], regex = True )
379
+
380
+ Only search in column ``'b' `` (dict -> dict)
381
+
382
+ .. ipython :: python
383
+
384
+ df.replace({' b' : ' .' }, {' b' : nan})
385
+
386
+ Same as the previous example, but use a regular expression for
387
+ searching instead (dict of regex -> dict)
388
+
389
+ .. ipython :: python
390
+
391
+ df.replace({' b' : r ' \s * \. \s * ' }, {' b' : nan}, regex = True )
392
+
393
+ You can pass nested dictionaries of regular expressions that use ``regex=True ``
394
+
395
+ .. ipython :: python
396
+
397
+ df.replace({' b' : {' b' : r ' ' }}, regex = True )
398
+
399
+ or you can pass the nested dictionary like so
400
+
401
+ .. ipython :: python
402
+
403
+ df.replace(regex = {' b' : {' b' : r ' \s * \. \s * ' }})
404
+
405
+ You can also use the group of a regular expression match when replacing (dict
406
+ of regex -> dict of regex), this works for lists as well
407
+
408
+ .. ipython :: python
409
+
410
+ df.replace({' b' : r ' \s * ( \. ) \s * ' }, {' b' : r ' \1 ty' }, regex = True )
411
+
412
+ You can pass a list of regular expressions, of which those that match
413
+ will be replaced with a scalar (list of regex -> regex)
414
+
415
+ .. ipython :: python
416
+
417
+ df.replace([r ' \s * \.\* ' , r ' a| b' ], nan, regex = True )
418
+
419
+ All of the regular expression examples can also be passed with the
420
+ ``to_replace `` argument as the ``regex `` argument. In this case the ``value ``
421
+ argument must be passed explicity by name or ``regex `` must be a nested
422
+ dictionary. The previous example, in this case, would then be
423
+
424
+ .. ipython :: python
425
+
426
+ df.replace(regex = [r ' \s * \.\* ' , r ' a| b' ], value = nan)
427
+
428
+ This can be convenient if you do not want to pass ``regex=True `` every time you
429
+ want to use a regular expression.
430
+
431
+ .. note ::
432
+
433
+ Anywhere in the above ``replace `` examples that you see a regular expression
434
+ a compiled regular expression is valid as well.
435
+
436
+ Numeric Replacement
437
+ ^^^^^^^^^^^^^^^^^^^
438
+
439
+ Similiar to ``DataFrame.fillna ``
440
+
441
+ .. ipython :: python
442
+
443
+ from numpy.random import rand, randn
444
+ from numpy import nan
445
+ from pandas import DataFrame
446
+ from pandas.util.testing import assert_frame_equal
447
+ df = DataFrame(randn(10 , 2 ))
448
+ df[rand(df.shape[0 ]) > 0.5 ] = 1.5
449
+ df.replace(1.5 , nan)
450
+
451
+ Replacing more than one value via lists works as well
452
+
453
+ .. ipython :: python
454
+
455
+ df00 = df.values[0 , 0 ]
456
+ df.replace([1.5 , df00], [nan, ' a' ])
457
+ df[1 ].dtype
458
+
459
+ You can also operate on the DataFrame in place
460
+
461
+ .. ipython :: python
462
+
463
+ df.replace(1.5 , nan, inplace = True )
337
464
338
465
Missing data casting rules and indexing
339
466
---------------------------------------
0 commit comments