diff --git a/.travis.yml b/.travis.yml index dc37c42..d0c9a1b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,10 +8,12 @@ python: - "3.5" - "3.6" # command to install dependencies -install: +install: - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi - "pip install dill" - "python setup.py install" # command to run tests -script: python tests.py +script: + - python tests.py + - python -m pytest sudo: false diff --git a/dev-requirements.txt b/dev-requirements.txt index 8aab0b6..0202432 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,3 +4,4 @@ coverage>=4.0.3 dill>=0.2.5 twine Sphinx +pytest diff --git a/pytest/conftest.py b/pytest/conftest.py new file mode 100644 index 0000000..9e6c8c8 --- /dev/null +++ b/pytest/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.fixture( + scope="module", + autouse=True, + params=["", None], + ids=["empty_is_blank", "empty_is_None"], +) +def empty_attribute_value(request): + """Run the entire test suite twice to vary CONSTANTS.empty_attribute_default. + + First run CONSTANTS.empty_attribute_default = "" + Second run CONSTANTS.empty_attribute_default = None + """ + from nameparser.config import CONSTANTS + + CONSTANTS.empty_attribute_default = request.param diff --git a/pytest/names/bare_names.json b/pytest/names/bare_names.json new file mode 100644 index 0000000..053040d --- /dev/null +++ b/pytest/names/bare_names.json @@ -0,0 +1,176 @@ +[ + "John Doe", + "John Doe, Jr.", + "John Doe III", + "Doe, John", + "Doe, John, Jr.", + "Doe, John III", + "John A. Doe", + "John A. Doe, Jr.", + "John A. Doe III", + "Doe, John A.", + "Doe, John A., Jr.", + "Doe, John A. III", + "John A. Kenneth Doe", + "John A. Kenneth Doe, Jr.", + "John A. Kenneth Doe III", + "Doe, John A. Kenneth", + "Doe, John A. Kenneth, Jr.", + "Doe, John A. Kenneth III", + "Dr. John Doe", + "Dr. John Doe, Jr.", + "Dr. John Doe III", + "Doe, Dr. John", + "Doe, Dr. John, Jr.", + "Doe, Dr. John III", + "Dr. John A. Doe", + "Dr. John A. Doe, Jr.", + "Dr. John A. Doe III", + "Doe, Dr. John A.", + "Doe, Dr. John A. Jr.", + "Doe, Dr. John A. III", + "Dr. John A. Kenneth Doe", + "Dr. John A. Kenneth Doe, Jr.", + "Dr. John A. Kenneth Doe III", + "Doe, Dr. John A. Kenneth", + "Doe, Dr. John A. Kenneth Jr.", + "Doe, Dr. John A. Kenneth III", + "Juan de la Vega", + "Juan de la Vega, Jr.", + "Juan de la Vega III", + "de la Vega, Juan", + "de la Vega, Juan, Jr.", + "de la Vega, Juan III", + "Juan Velasquez y Garcia", + "Juan Velasquez y Garcia, Jr.", + "Juan Velasquez y Garcia III", + "Velasquez y Garcia, Juan", + "Velasquez y Garcia, Juan, Jr.", + "Velasquez y Garcia, Juan III", + "Dr. Juan de la Vega", + "Dr. Juan de la Vega, Jr.", + "Dr. Juan de la Vega III", + "de la Vega, Dr. Juan", + "de la Vega, Dr. Juan, Jr.", + "de la Vega, Dr. Juan III", + "Dr. Juan Velasquez y Garcia", + "Dr. Juan Velasquez y Garcia, Jr.", + "Dr. Juan Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan", + "Velasquez y Garcia, Dr. Juan, Jr.", + "Velasquez y Garcia, Dr. Juan III", + "Juan Q. de la Vega", + "Juan Q. de la Vega, Jr.", + "Juan Q. de la Vega III", + "de la Vega, Juan Q.", + "de la Vega, Juan Q., Jr.", + "de la Vega, Juan Q. III", + "Juan Q. Velasquez y Garcia", + "Juan Q. Velasquez y Garcia, Jr.", + "Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q.", + "Velasquez y Garcia, Juan Q., Jr.", + "Velasquez y Garcia, Juan Q. III", + "Dr. Juan Q. de la Vega", + "Dr. Juan Q. de la Vega, Jr.", + "Dr. Juan Q. de la Vega III", + "de la Vega, Dr. Juan Q.", + "de la Vega, Dr. Juan Q., Jr.", + "de la Vega, Dr. Juan Q. III", + "Dr. Juan Q. Velasquez y Garcia", + "Dr. Juan Q. Velasquez y Garcia, Jr.", + "Dr. Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q.", + "Velasquez y Garcia, Dr. Juan Q., Jr.", + "Velasquez y Garcia, Dr. Juan Q. III", + "Juan Q. Xavier de la Vega", + "Juan Q. Xavier de la Vega, Jr.", + "Juan Q. Xavier de la Vega III", + "de la Vega, Juan Q. Xavier", + "de la Vega, Juan Q. Xavier, Jr.", + "de la Vega, Juan Q. Xavier III", + "Juan Q. Xavier Velasquez y Garcia", + "Juan Q. Xavier Velasquez y Garcia, Jr.", + "Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q. Xavier", + "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Juan Q. Xavier III", + "Dr. Juan Q. Xavier de la Vega", + "Dr. Juan Q. Xavier de la Vega, Jr.", + "Dr. Juan Q. Xavier de la Vega III", + "de la Vega, Dr. Juan Q. Xavier", + "de la Vega, Dr. Juan Q. Xavier, Jr.", + "de la Vega, Dr. Juan Q. Xavier III", + "Dr. Juan Q. Xavier Velasquez y Garcia", + "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "Dr. Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q. Xavier", + "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "John Doe, CLU, CFP, LUTC", + "John P. Doe, CLU, CFP, LUTC", + "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "Hon. Barrington P. Doe-Ray, Jr.", + "Doe-Ray, Hon. Barrington P. Jr.", + "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "Jose Aznar y Lopez", + "John E Smith", + "John e Smith", + "John and Jane Smith", + "Rev. John A. Kenneth Doe", + "Donovan McNabb-Smith", + "Rev John A. Kenneth Doe", + "Doe, Rev. John A. Jr.", + "Buca di Beppo", + "Lt. Gen. John A. Kenneth Doe, Jr.", + "Doe, Lt. Gen. John A. Kenneth IV", + "Lt. Gen. John A. Kenneth Doe IV", + "Mr. and Mrs. John Smith", + "John Jones (Google Docs)", + "john e jones", + "john e jones, III", + "jones, john e", + "E.T. Smith", + "E.T. Smith, II", + "Smith, E.T., Jr.", + "A.B. Vajpayee", + "Rt. Hon. Paul E. Mary", + "Maid Marion", + "Amy E. Maid", + "Jane Doctor", + "Doctor, Jane E.", + "dr. ben alex johnson III", + "Lord of the Universe and Supreme King of the World Lisa Simpson", + "Benjamin (Ben) Franklin", + "Benjamin \"Ben\" Franklin", + "Brian O'connor", + "Sir Gerald", + "Magistrate Judge John F. Forster, Jr", + "Magistrate Judge Joaquin V.E. Manibusan, Jr", + "Magistrate-Judge Elizabeth Todd Campbell", + "Mag-Judge Harwell G Davis, III", + "Mag. Judge Byron G. Cudmore", + "Chief Judge J. Leon Holmes", + "Chief Judge Sharon Lovelace Blackburn", + "Judge James M. Moody", + "Judge G. Thomas Eisele", + "Judge Callie V. S. Granade", + "Judge C Lynwood Smith, Jr", + "Senior Judge Charles R. Butler, Jr", + "Senior Judge Harold D. Vietor", + "Senior Judge Virgil Pittman", + "Honorable Terry F. Moorer", + "Honorable W. Harold Albritton, III", + "Honorable Judge W. Harold Albritton, III", + "Honorable Judge Terry F. Moorer", + "Honorable Judge Susan Russ Walker", + "Hon. Marian W. Payson", + "Hon. Charles J. Siragusa", + "US Magistrate Judge T Michael Putnam", + "Designated Judge David A. Ezra", + "Sr US District Judge Richard G Kopf", + "U.S. District Judge Marc Thomas Treadwell", + "Dra. Andréia da Silva", + "Srta. Andréia da Silva" +] \ No newline at end of file diff --git a/pytest/names/brute_force.json b/pytest/names/brute_force.json new file mode 100644 index 0000000..458ef6d --- /dev/null +++ b/pytest/names/brute_force.json @@ -0,0 +1,805 @@ +[ + { + "raw": "John Doe", + "first": "John", + "last": "Doe" + }, + { + "raw": "John Doe, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "John Doe III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, John", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, John, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, John III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "John A. Doe", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "John A. Doe, Jr", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr" + }, + { + "raw": "John A. Doe III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, John A.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, John A., Jr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, John A., III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "John A. Kenneth Doe", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "John A. Kenneth Doe, Jr.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "John A. Kenneth Doe III", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Doe, John. A. Kenneth", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "Doe, John. A. Kenneth, Jr.", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "Doe, John. A. Kenneth III", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Dr. John Doe", + "first": "John", + "last": "Doe", + "title": "Dr." + }, + { + "raw": "Dr. John Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Dr. John Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John", + "title": "Dr.", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Doe", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Dr. John A. Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, Dr. John A. Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. III", + "title": "Dr.", + "middle": "A.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Kenneth Doe", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Dr. John A. Kenneth Doe, Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Al Arnold Gore, Jr.", + "middle": "Arnold", + "first": "Al", + "last": "Gore", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Kenneth Doe III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A. Kenneth", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John A. Kenneth Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. Kenneth III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Juan de la Vega", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Juan de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Juan de la Vega III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Juan Velasquez y Garcia", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Velasquez y Garcia, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Velasquez y Garcia III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan de la Vega", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan de la Vega, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan de la Vega III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Dr. Juan Velasquez y Garcia", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Velasquez y Garcia, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Velasquez y Garcia III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. de la Vega", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "Juan Q. de la Vega III", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III" + }, + { + "raw": "Juan Q. Velasquez y Garcia", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Velasquez y Garcia, Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. de la Vega", + "title": "Dr.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr.", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III", + "title": "Dr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia, Jr.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. III", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q. Xavier", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. Xavier III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "title": "Dr.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier", + "first": "Juan", + "title": "Dr.", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "title": "Dr.", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "John Doe, CLU, CFP, LUTC", + "first": "John", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "John P. Doe, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe-Ray", + "title": "Dr.", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "title": "Dr.", + "middle": "P.", + "first": "John", + "last": "Doe-Ray", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Hon. Barrington P. Doe-Ray, Jr.", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr." + }, + { + "raw": "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr., CFP, LUTC" + }, + { + "raw": "Rt. Hon. Paul E. Mary", + "title": "Rt. Hon.", + "first": "Paul", + "middle": "E.", + "last": "Mary" + }, + { + "raw": "Lord God Almighty", + "title": "Lord", + "first": "God", + "last": "Almighty" + } +] \ No newline at end of file diff --git a/pytest/names/capitalization.json b/pytest/names/capitalization.json new file mode 100644 index 0000000..90f07dd --- /dev/null +++ b/pytest/names/capitalization.json @@ -0,0 +1,59 @@ +[ + { + "id": "test_downcasing_mac", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=15", + "raw": "RONALD MACDONALD", + "string": "Ronald MacDonald" + }, + { + "id": "test_downcasing_mc", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=23", + "raw": "RONALD MCDONALD", + "string": "Ronald McDonald" + }, + { + "id": "test_capitalization_exception_for_III", + "raw": "juan q. xavier velasquez y garcia iii", + "string": "Juan Q. Xavier Velasquez y Garcia III" + }, + { + "id": "test_capitalize_title", + "raw": "lt. gen. john a. kenneth doe iv", + "string": "Lt. Gen. John A. Kenneth Doe IV" + }, + { + "id": "test_capitalize_title_to_lower", + "raw": "LT. GEN. JOHN A. KENNETH DOE IV", + "string": "Lt. Gen. John A. Kenneth Doe IV" + }, + { + "id": "test_capitalization_with_Mac_as_hyphenated_names", + "raw": "donovan mcnabb-smith", + "string": "Donovan McNabb-Smith" + }, + { + "id": "test_capitization_middle_initial_is_also_a_conjunction", + "raw": "scott e. werner", + "string": "Scott E. Werner" + }, + { + "id": "test_capitalize_diacritics", + "raw": "matthëus schmidt", + "string": "Matthëus Schmidt" + }, + { + "id": "test_short_names_with_mac", + "raw": "mack johnson", + "string": "Mack Johnson" + }, + { + "id": "test_portuguese_prefixes", + "raw": "joao da silva do amaral de souza", + "string": "Joao da Silva do Amaral de Souza" + }, + { + "id": "test_capitalize_prefix_clash_on_first_name", + "raw": "van nguyen", + "string": "Van Nguyen" + } +] \ No newline at end of file diff --git a/pytest/names/conjunction.json b/pytest/names/conjunction.json new file mode 100644 index 0000000..318adf3 --- /dev/null +++ b/pytest/names/conjunction.json @@ -0,0 +1,199 @@ +[ + { + "id": "test_last_name_with_conjunction", + "raw": "Jose Aznar y Lopez", + "first": "Jose", + "last": "Aznar y Lopez" + }, + { + "id": "test_multiple_conjunctions", + "raw": "part1 of The part2 of the part3 and part4", + "first": "part1 of The part2 of the part3 and part4" + }, + { + "id": "test_multiple_conjunctions2", + "raw": "part1 of and The part2 of the part3 And part4", + "first": "part1 of and The part2 of the part3 And part4" + }, + { + "id": "test_ends_with_conjunction", + "raw": "Jon Dough and", + "first": "Jon", + "last": "Dough and" + }, + { + "id": "test_ends_with_two_conjunctions", + "raw": "Jon Dough and of", + "first": "Jon", + "last": "Dough and of" + }, + { + "id": "test_starts_with_conjunction", + "raw": "and Jon Dough", + "first": "and Jon", + "last": "Dough" + }, + { + "id": "test_starts_with_two_conjunctions", + "raw": "the and Jon Dough", + "first": "the and Jon", + "last": "Dough" + }, + { + "id": "test_uppercase_middle_initial_conflict_with_conjunction_upper_means_initial", + "raw": "John E Smith", + "first": "John", + "middle": "E", + "last": "Smith" + }, + { + "id": "test_lowercase_middle_initial_with_period_conflict_with_conjunction", + "raw": "john e. smith", + "first": "john", + "middle": "e.", + "last": "smith" + }, + { + "id": "test_lowercase_first_initial_conflict_with_conjunction", + "raw": "e j smith", + "first": "e", + "middle": "j", + "last": "smith" + }, + { + "id": "test_lowercase_middle_initial_conflict_with_conjunction", + "raw": "John e Smith", + "first": "John", + "middle": "e", + "last": "Smith" + }, + { + "id": "test_lowercase_middle_initial_and_suffix_conflict_with_conjunction", + "raw": "John e Smith, III", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III" + }, + { + "id": "test_lowercase_middle_initial_and_nocomma_suffix_conflict_with_conjun", + "raw": "John e Smith III", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III" + }, + { + "id": "test_lowercase_middle_initial_comma_lastname_and_suffix_conflict_with_conjun", + "raw": "Smith, John e, III, Jr", + "first": "John", + "middle": "e", + "last": "Smith", + "suffix": "III, Jr" + }, + { + "id": "test_couples_names", + "raw": "John and Jane Smith", + "first": "John and Jane", + "last": "Smith" + }, + { + "id": "test_couples_names_with_conjunction_lastname", + "raw": "John and Jane Aznar y Lopez", + "first": "John and Jane", + "last": "Aznar y Lopez" + }, + { + "id": "test_couple_titles", + "raw": "Mr. and Mrs. John and Jane Smith", + "title": "Mr. and Mrs.", + "first": "John and Jane", + "last": "Smith" + }, + { + "id": "test_title_with_three_part_name_last_initial_is_suffix_uppercase_no_p", + "raw": "King John Alexander V", + "title": "King", + "first": "John", + "last": "Alexander", + "suffix": "V" + }, + { + "id": "test_four_name_parts_with_suffix_that_could_be_initial_lowercase_no_p", + "raw": "larry james edward johnson v", + "first": "larry", + "middle": "james edward", + "last": "johnson", + "suffix": "v" + }, + { + "id": "test_four_name_parts_with_suffix_that_could_be_initial_uppercase_no_p", + "raw": "Larry James Johnson I", + "first": "Larry", + "middle": "James", + "last": "Johnson", + "suffix": "I" + }, + { + "id": "test_roman_numeral_initials", + "raw": "Larry V I", + "first": "Larry", + "middle": "V", + "last": "I" + }, + { + "id": "test124_Rev_title", + "raw": "Rev. John A. Kenneth Doe", + "title": "Rev.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "id": "test125_Rev_title", + "raw": "Rev John A. Kenneth Doe", + "title": "Rev", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "id": "test126_Rev_title", + "raw": "Doe, Rev. John A. Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "id": "test127", + "raw": "Buca di Beppo", + "first": "Buca", + "last": "di Beppo" + }, + { + "id": "test_le_as_last_name", + "raw": "Yin Le", + "first": "Yin", + "last": "Le" + }, + { + "id": "test_le_as_last_name_with_middle_initial", + "raw": "Yin a Le", + "first": "Yin", + "middle": "a", + "last": "Le" + }, + { + "id": "test_conjunction_in_an_address_with_a_title", + "raw": "His Excellency Lord Duncan", + "title": "His Excellency Lord", + "last": "Duncan" + }, + { + "id": "test_name_is_conjunctions", + "raw": "e and e", + "first": "e and e" + } +] \ No newline at end of file diff --git a/pytest/names/first_name.json b/pytest/names/first_name.json new file mode 100644 index 0000000..9ac13ba --- /dev/null +++ b/pytest/names/first_name.json @@ -0,0 +1,57 @@ +[ + { + "id": "test_first_name_is_not_prefix_if_only_two_parts", + "_note": "When there are only two parts, don't join prefixes or conjunctions", + "raw": "Van Nguyen", + "first": "Van", + "last": "Nguyen" + }, + { + "id": "test_first_name", + "raw": "Andrew", + "first": "Andrew" + }, + { + "id": "test_assume_title_and_one_other_name_is_last_name", + "raw": "Rev Andrews", + "title": "Rev", + "last": "Andrews" + }, + { + "id": "test_suffix_in_lastname_part_of_lastname_comma_format", + "raw": "Smith Jr., John", + "last": "Smith", + "first": "John", + "suffix": "Jr." + }, + { + "id": "test_sir_exception_to_first_name_rule", + "raw": "Sir Gerald", + "title": "Sir", + "first": "Gerald" + }, + { + "id": "test_king_exception_to_first_name_rule", + "raw": "King Henry", + "title": "King", + "first": "Henry" + }, + { + "id": "test_queen_exception_to_first_name_rule", + "raw": "Queen Elizabeth", + "title": "Queen", + "first": "Elizabeth" + }, + { + "id": "test_dame_exception_to_first_name_rule", + "raw": "Dame Mary", + "title": "Dame", + "first": "Mary" + }, + { + "id": "test_first_name_is_not_prefix_if_only_two_parts_comma", + "raw": "Nguyen, Van", + "first": "Van", + "last": "Nguyen" + } +] \ No newline at end of file diff --git a/pytest/names/nickname.json b/pytest/names/nickname.json new file mode 100644 index 0000000..11e009b --- /dev/null +++ b/pytest/names/nickname.json @@ -0,0 +1,105 @@ +[ + { + "id": "test_nickname_in_parenthesis", + "_note": "https://code.google.com/p/python-nameparser/issues/detail?id=33", + "raw": "Benjamin (Ben) Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_two_word_nickname_in_parenthesis", + "raw": "Benjamin (Big Ben) Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Big Ben" + }, + { + "id": "test_two_words_in_quotes", + "raw": "Benjamin \"Big Ben\" Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Big Ben" + }, + { + "id": "test_nickname_in_parenthesis_with_comma", + "raw": "Franklin, Benjamin (Ben)", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_parenthesis_with_comma_and_suffix", + "raw": "Franklin, Benjamin (Ben), Jr.", + "first": "Benjamin", + "last": "Franklin", + "suffix": "Jr.", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_single_quotes", + "raw": "Benjamin 'Ben' Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_nickname_in_double_quotes", + "raw": "Benjamin \"Ben\" Franklin", + "first": "Benjamin", + "last": "Franklin", + "nickname": "Ben" + }, + { + "id": "test_single_quotes_on_first_name_not_treated_as_nickname", + "raw": "Brian Andrew O'connor", + "first": "Brian", + "middle": "Andrew", + "last": "O'connor" + }, + { + "id": "test_single_quotes_on_both_name_not_treated_as_nickname", + "raw": "La'tanya O'connor", + "first": "La'tanya", + "last": "O'connor" + }, + { + "id": "test_single_quotes_on_end_of_last_name_not_treated_as_nickname", + "raw": "Mari' Aube'", + "first": "Mari'", + "last": "Aube'" + }, + { + "id": "test_okina_inside_name_not_treated_as_nickname", + "raw": "Harrieta Keōpūolani Nāhiʻenaʻena", + "first": "Harrieta", + "middle": "Keōpūolani", + "last": "Nāhiʻenaʻena" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Hawaiian_example", + "raw": "Harietta Keopuolani Nahi'ena'ena", + "first": "Harietta", + "middle": "Keopuolani", + "last": "Nahi'ena'ena" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Kenyan_example", + "raw": "Naomi Wambui Ng'ang'a", + "first": "Naomi", + "middle": "Wambui", + "last": "Ng'ang'a" + }, + { + "id": "test_single_quotes_not_treated_as_nickname_Samoan_example", + "raw": "Va'apu'u Vitale", + "first": "Va'apu'u", + "last": "Vitale" + }, + { + "id": "test_nickname_and_last_name", + "raw": "\"Rick\" Edmonds", + "last": "Edmonds", + "nickname": "Rick" + } +] \ No newline at end of file diff --git a/pytest/names/prefix.json b/pytest/names/prefix.json new file mode 100644 index 0000000..590c5ab --- /dev/null +++ b/pytest/names/prefix.json @@ -0,0 +1,118 @@ +[ + { + "id": "test_comma_two_part_last_name_with_suffix_in_first_part", + "_note": "I'm kinda surprised this works, not really sure if this is a realistic place for a suffix to be.", + "raw": "von bergen wessels MD, pennie", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD" + }, + { + "id": "test_prefix", + "raw": "Juan del Sur", + "first": "Juan", + "last": "del Sur" + }, + { + "id": "test_prefix_with_period", + "raw": "Jill St. John", + "first": "Jill", + "last": "St. John" + }, + { + "id": "test_prefix_before_two_part_last_name", + "raw": "pennie von bergen wessels", + "first": "pennie", + "last": "von bergen wessels" + }, + { + "id": "test_prefix_before_two_part_last_name_with_suffix", + "raw": "pennie von bergen wessels III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_prefix_before_two_part_last_name_with_acronym_suffix", + "raw": "pennie von bergen wessels M.D.", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "M.D." + }, + { + "id": "test_two_part_last_name_with_suffix_comma", + "raw": "pennie von bergen wessels, III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_two_part_last_name_with_suffix", + "raw": "von bergen wessels, pennie III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "III" + }, + { + "id": "test_last_name_two_part_last_name_with_two_suffixes", + "raw": "von bergen wessels MD, pennie III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD, III" + }, + { + "id": "test_comma_two_part_last_name_with_acronym_suffix", + "raw": "von bergen wessels, pennie MD", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD" + }, + { + "id": "test_title_two_part_last_name_with_suffix_in_first_part", + "raw": "pennie von bergen wessels MD, III", + "first": "pennie", + "last": "von bergen wessels", + "suffix": "MD, III" + }, + { + "id": "test_portuguese_dos", + "raw": "Rafael Sousa dos Anjos", + "first": "Rafael", + "middle": "Sousa", + "last": "dos Anjos" + }, + { + "id": "test_portuguese_prefixes", + "raw": "Joao da Silva do Amaral de Souza", + "first": "Joao", + "middle": "da Silva do Amaral", + "last": "de Souza" + }, + { + "id": "test_three_conjunctions", + "raw": "Dr. Juan Q. Xavier de la dos Vega III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "id": "test_lastname_three_conjunctions", + "raw": "de la dos Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "id": "test_comma_three_conjunctions", + "raw": "Dr. Juan Q. Xavier de la dos Vega, III", + "first": "Juan", + "last": "de la dos Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + } +] \ No newline at end of file diff --git a/pytest/names/suffix.json b/pytest/names/suffix.json new file mode 100644 index 0000000..af2f082 --- /dev/null +++ b/pytest/names/suffix.json @@ -0,0 +1,140 @@ +[ + { + "id": "test_two_suffixes", + "_note": "This adds a comma when the original format did not have one. Not ideal but at least its in the right bucket.", + "raw": "Kenneth Clarke QC MP", + "first": "Kenneth", + "last": "Clarke", + "suffix": "QC, MP" + }, + { + "id": "test_two_suffixes_lastname_comma_format", + "_note": "This adds a comma when the original format did not have one.", + "raw": "Washington Jr. MD, Franklin", + "first": "Franklin", + "last": "Washington", + "suffix": "Jr., MD" + }, + { + "id": "test_suffix", + "raw": "Joe Franklin Jr", + "first": "Joe", + "last": "Franklin", + "suffix": "Jr" + }, + { + "id": "test_suffix_with_periods", + "raw": "Joe Dentist D.D.S.", + "first": "Joe", + "last": "Dentist", + "suffix": "D.D.S." + }, + { + "id": "test_two_suffixes_suffix_comma_format", + "raw": "Franklin Washington, Jr. MD", + "first": "Franklin", + "last": "Washington", + "suffix": "Jr. MD" + }, + { + "id": "test_suffix_containing_periods", + "raw": "Kenneth Clarke Q.C.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C." + }, + { + "id": "test_suffix_containing_periods_lastname_comma_format", + "raw": "Clarke, Kenneth, Q.C. M.P.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C. M.P." + }, + { + "id": "test_suffix_containing_periods_suffix_comma_format", + "raw": "Kenneth Clarke Q.C., M.P.", + "first": "Kenneth", + "last": "Clarke", + "suffix": "Q.C., M.P." + }, + { + "id": "test_suffix_with_single_comma_format", + "raw": "John Doe jr., MD", + "first": "John", + "last": "Doe", + "suffix": "jr., MD" + }, + { + "id": "test_suffix_with_double_comma_format", + "raw": "Doe, John jr., MD", + "first": "John", + "last": "Doe", + "suffix": "jr., MD" + }, + { + "id": "test_phd_with_erroneous_space", + "raw": "John Smith, Ph. D.", + "first": "John", + "last": "Smith", + "suffix": "Ph. D." + }, + { + "id": "test_phd_conflict", + "raw": "Adolph D", + "first": "Adolph", + "last": "D" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_with_suffix", + "raw": "Jack Ma Jr", + "first": "Jack", + "last": "Ma", + "suffix": "Jr" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_with_suffix_comma", + "raw": "Ma III, Jack Jr", + "first": "Jack", + "last": "Ma", + "suffix": "III, Jr" + }, + { + "id": "test_potential_suffix_that_is_also_last_name", + "raw": "Jack Ma", + "first": "Jack", + "last": "Ma" + }, + { + "id": "test_potential_suffix_that_is_also_last_name_comma", + "raw": "Ma, Jack", + "first": "Jack", + "last": "Ma" + }, + { + "id": "test_potential_suffix_that_is_also_first_name_comma", + "raw": "Johnson, Bart", + "first": "Bart", + "last": "Johnson" + }, + { + "id": "test_multiple_letter_suffix_with_periods", + "raw": "John Doe Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + }, + { + "id": "test_suffix_with_periods_with_comma", + "raw": "John Doe, Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + }, + { + "id": "test_suffix_with_periods_with_lastname_comma", + "raw": "Doe, John Msc.Ed.", + "first": "John", + "last": "Doe", + "suffix": "Msc.Ed." + } +] \ No newline at end of file diff --git a/pytest/names/test_bank.json b/pytest/names/test_bank.json new file mode 100644 index 0000000..c9a3538 --- /dev/null +++ b/pytest/names/test_bank.json @@ -0,0 +1,983 @@ +{ + "singular_test_names": [ + "John Doe", + "John Doe, Jr.", + "John Doe III", + "Doe, John", + "Doe, John, Jr.", + "Doe, John III", + "John A. Doe", + "John A. Doe, Jr.", + "John A. Doe III", + "Doe, John A.", + "Doe, John A., Jr.", + "Doe, John A. III", + "John A. Kenneth Doe", + "John A. Kenneth Doe, Jr.", + "John A. Kenneth Doe III", + "Doe, John A. Kenneth", + "Doe, John A. Kenneth, Jr.", + "Doe, John A. Kenneth III", + "Dr. John Doe", + "Dr. John Doe, Jr.", + "Dr. John Doe III", + "Doe, Dr. John", + "Doe, Dr. John, Jr.", + "Doe, Dr. John III", + "Dr. John A. Doe", + "Dr. John A. Doe, Jr.", + "Dr. John A. Doe III", + "Doe, Dr. John A.", + "Doe, Dr. John A. Jr.", + "Doe, Dr. John A. III", + "Dr. John A. Kenneth Doe", + "Dr. John A. Kenneth Doe, Jr.", + "Dr. John A. Kenneth Doe III", + "Doe, Dr. John A. Kenneth", + "Doe, Dr. John A. Kenneth Jr.", + "Doe, Dr. John A. Kenneth III", + "Juan de la Vega", + "Juan de la Vega, Jr.", + "Juan de la Vega III", + "de la Vega, Juan", + "de la Vega, Juan, Jr.", + "de la Vega, Juan III", + "Juan Velasquez y Garcia", + "Juan Velasquez y Garcia, Jr.", + "Juan Velasquez y Garcia III", + "Velasquez y Garcia, Juan", + "Velasquez y Garcia, Juan, Jr.", + "Velasquez y Garcia, Juan III", + "Dr. Juan de la Vega", + "Dr. Juan de la Vega, Jr.", + "Dr. Juan de la Vega III", + "de la Vega, Dr. Juan", + "de la Vega, Dr. Juan, Jr.", + "de la Vega, Dr. Juan III", + "Dr. Juan Velasquez y Garcia", + "Dr. Juan Velasquez y Garcia, Jr.", + "Dr. Juan Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan", + "Velasquez y Garcia, Dr. Juan, Jr.", + "Velasquez y Garcia, Dr. Juan III", + "Juan Q. de la Vega", + "Juan Q. de la Vega, Jr.", + "Juan Q. de la Vega III", + "de la Vega, Juan Q.", + "de la Vega, Juan Q., Jr.", + "de la Vega, Juan Q. III", + "Juan Q. Velasquez y Garcia", + "Juan Q. Velasquez y Garcia, Jr.", + "Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q.", + "Velasquez y Garcia, Juan Q., Jr.", + "Velasquez y Garcia, Juan Q. III", + "Dr. Juan Q. de la Vega", + "Dr. Juan Q. de la Vega, Jr.", + "Dr. Juan Q. de la Vega III", + "de la Vega, Dr. Juan Q.", + "de la Vega, Dr. Juan Q., Jr.", + "de la Vega, Dr. Juan Q. III", + "Dr. Juan Q. Velasquez y Garcia", + "Dr. Juan Q. Velasquez y Garcia, Jr.", + "Dr. Juan Q. Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q.", + "Velasquez y Garcia, Dr. Juan Q., Jr.", + "Velasquez y Garcia, Dr. Juan Q. III", + "Juan Q. Xavier de la Vega", + "Juan Q. Xavier de la Vega, Jr.", + "Juan Q. Xavier de la Vega III", + "de la Vega, Juan Q. Xavier", + "de la Vega, Juan Q. Xavier, Jr.", + "de la Vega, Juan Q. Xavier III", + "Juan Q. Xavier Velasquez y Garcia", + "Juan Q. Xavier Velasquez y Garcia, Jr.", + "Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Juan Q. Xavier", + "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Juan Q. Xavier III", + "Dr. Juan Q. Xavier de la Vega", + "Dr. Juan Q. Xavier de la Vega, Jr.", + "Dr. Juan Q. Xavier de la Vega III", + "de la Vega, Dr. Juan Q. Xavier", + "de la Vega, Dr. Juan Q. Xavier, Jr.", + "de la Vega, Dr. Juan Q. Xavier III", + "Dr. Juan Q. Xavier Velasquez y Garcia", + "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "Dr. Juan Q. Xavier Velasquez y Garcia III", + "Velasquez y Garcia, Dr. Juan Q. Xavier", + "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "John Doe, CLU, CFP, LUTC", + "John P. Doe, CLU, CFP, LUTC", + "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "Hon. Barrington P. Doe-Ray, Jr.", + "Doe-Ray, Hon. Barrington P. Jr.", + "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "Jose Aznar y Lopez", + "John E Smith", + "John e Smith", + "John and Jane Smith", + "Rev. John A. Kenneth Doe", + "Donovan McNabb-Smith", + "Rev John A. Kenneth Doe", + "Doe, Rev. John A. Jr.", + "Buca di Beppo", + "Lt. Gen. John A. Kenneth Doe, Jr.", + "Doe, Lt. Gen. John A. Kenneth IV", + "Lt. Gen. John A. Kenneth Doe IV", + "Mr. and Mrs. John Smith", + "John Jones (Google Docs)", + "john e jones", + "john e jones, III", + "jones, john e", + "E.T. Smith", + "E.T. Smith, II", + "Smith, E.T., Jr.", + "A.B. Vajpayee", + "Rt. Hon. Paul E. Mary", + "Maid Marion", + "Amy E. Maid", + "Jane Doctor", + "Doctor, Jane E.", + "dr. ben alex johnson III", + "Lord of the Universe and Supreme King of the World Lisa Simpson", + "Benjamin (Ben) Franklin", + "Benjamin \"Ben\" Franklin", + "Brian O'connor", + "Sir Gerald", + "Magistrate Judge John F. Forster, Jr", + "Magistrate Judge Joaquin V.E. Manibusan, Jr", + "Magistrate-Judge Elizabeth Todd Campbell", + "Mag-Judge Harwell G Davis, III", + "Mag. Judge Byron G. Cudmore", + "Chief Judge J. Leon Holmes", + "Chief Judge Sharon Lovelace Blackburn", + "Judge James M. Moody", + "Judge G. Thomas Eisele", + "Judge Callie V. S. Granade", + "Judge C Lynwood Smith, Jr", + "Senior Judge Charles R. Butler, Jr", + "Senior Judge Harold D. Vietor", + "Senior Judge Virgil Pittman", + "Honorable Terry F. Moorer", + "Honorable W. Harold Albritton, III", + "Honorable Judge W. Harold Albritton, III", + "Honorable Judge Terry F. Moorer", + "Honorable Judge Susan Russ Walker", + "Hon. Marian W. Payson", + "Hon. Charles J. Siragusa", + "US Magistrate Judge T Michael Putnam", + "Designated Judge David A. Ezra", + "Sr US District Judge Richard G Kopf", + "U.S. District Judge Marc Thomas Treadwell", + "Dra. Andréia da Silva", + "Srta. Andréia da Silva" + ], + "brute_force": [ + { + "raw": "John Doe", + "first": "John", + "last": "Doe" + }, + { + "raw": "John Doe, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "John Doe III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, John", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, John, Jr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, John III", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "John A. Doe", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "John A. Doe, Jr", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr" + }, + { + "raw": "John A. Doe III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, John A.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, John A., Jr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, John A., III", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "John A. Kenneth Doe", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "John A. Kenneth Doe, Jr.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "John A. Kenneth Doe III", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Doe, John. A. Kenneth", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth" + }, + { + "raw": "Doe, John. A. Kenneth, Jr.", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "raw": "Doe, John. A. Kenneth III", + "first": "John.", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "III" + }, + { + "raw": "Dr. John Doe", + "first": "John", + "last": "Doe", + "title": "Dr." + }, + { + "raw": "Dr. John Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Dr. John Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John", + "title": "Dr.", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Doe", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Dr. John A. Doe, Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Doe III", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A." + }, + { + "raw": "Doe, Dr. John A. Jr.", + "title": "Dr.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. III", + "title": "Dr.", + "middle": "A.", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Dr. John A. Kenneth Doe", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Dr. John A. Kenneth Doe, Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Al Arnold Gore, Jr.", + "middle": "Arnold", + "first": "Al", + "last": "Gore", + "suffix": "Jr." + }, + { + "raw": "Dr. John A. Kenneth Doe III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Doe, Dr. John A. Kenneth", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe" + }, + { + "raw": "Doe, Dr. John A. Kenneth Jr.", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "Jr." + }, + { + "raw": "Doe, Dr. John A. Kenneth III", + "title": "Dr.", + "middle": "A. Kenneth", + "first": "John", + "last": "Doe", + "suffix": "III" + }, + { + "raw": "Juan de la Vega", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Juan de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Juan de la Vega III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan, Jr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan III", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Juan Velasquez y Garcia", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Velasquez y Garcia, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Velasquez y Garcia III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan, Jr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan III", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan de la Vega", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan de la Vega, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan de la Vega III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "Dr. Juan Velasquez y Garcia", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Velasquez y Garcia, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Velasquez y Garcia III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan, Jr.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan III", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. de la Vega", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "Juan Q. de la Vega III", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III" + }, + { + "raw": "Juan Q. Velasquez y Garcia", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Velasquez y Garcia, Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. III", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. de la Vega", + "title": "Dr.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "title": "Dr.", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q.", + "first": "Juan", + "middle": "Q.", + "last": "de la Vega", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q., Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "Jr.", + "title": "Dr." + }, + { + "raw": "de la Vega, Dr. Juan Q. III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q.", + "suffix": "III", + "title": "Dr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia, Jr.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Velasquez y Garcia III", + "middle": "Q.", + "title": "Dr.", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q.", + "title": "Dr.", + "middle": "Q.", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q., Jr.", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. III", + "middle": "Q.", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Juan Q. Xavier", + "first": "Juan", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Juan Q. Xavier III", + "first": "Juan", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega", + "first": "Juan", + "middle": "Q. Xavier", + "title": "Dr.", + "last": "de la Vega" + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier de la Vega III", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier", + "first": "Juan", + "title": "Dr.", + "middle": "Q. Xavier", + "last": "de la Vega" + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier, Jr.", + "first": "Juan", + "last": "de la Vega", + "title": "Dr.", + "middle": "Q. Xavier", + "suffix": "Jr." + }, + { + "raw": "de la Vega, Dr. Juan Q. Xavier III", + "first": "Juan", + "title": "Dr.", + "last": "de la Vega", + "middle": "Q. Xavier", + "suffix": "III" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Dr. Juan Q. Xavier Velasquez y Garcia III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier", + "title": "Dr.", + "middle": "Q. Xavier", + "first": "Juan", + "last": "Velasquez y Garcia" + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier, Jr.", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "Jr." + }, + { + "raw": "Velasquez y Garcia, Dr. Juan Q. Xavier III", + "middle": "Q. Xavier", + "first": "Juan", + "title": "Dr.", + "last": "Velasquez y Garcia", + "suffix": "III" + }, + { + "raw": "John Doe, CLU, CFP, LUTC", + "first": "John", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "John P. Doe, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Dr. John P. Doe-Ray, CLU, CFP, LUTC", + "first": "John", + "middle": "P.", + "last": "Doe-Ray", + "title": "Dr.", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Doe-Ray, Dr. John P., CLU, CFP, LUTC", + "title": "Dr.", + "middle": "P.", + "first": "John", + "last": "Doe-Ray", + "suffix": "CLU, CFP, LUTC" + }, + { + "raw": "Hon. Barrington P. Doe-Ray, Jr.", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr." + }, + { + "raw": "Doe-Ray, Hon. Barrington P. Jr., CFP, LUTC", + "title": "Hon.", + "middle": "P.", + "first": "Barrington", + "last": "Doe-Ray", + "suffix": "Jr., CFP, LUTC" + }, + { + "raw": "Rt. Hon. Paul E. Mary", + "title": "Rt. Hon.", + "first": "Paul", + "middle": "E.", + "last": "Mary" + }, + { + "raw": "Lord God Almighty", + "title": "Lord", + "first": "God", + "last": "Almighty" + } + ] +} \ No newline at end of file diff --git a/pytest/names/title.json b/pytest/names/title.json new file mode 100644 index 0000000..357524d --- /dev/null +++ b/pytest/names/title.json @@ -0,0 +1,221 @@ +[ + { + "id": "test_last_name_also_prefix", + "_note": "http://code.google.com/p/python-nameparser/issues/detail?id=13", + "raw": "Jane Doctor", + "first": "Jane", + "last": "Doctor" + }, + { + "id": "test_last_name_is_also_title", + "raw": "Amy E Maid", + "first": "Amy", + "middle": "E", + "last": "Maid" + }, + { + "id": "test_last_name_is_also_title_no_comma", + "raw": "Dr. Martin Luther King Jr.", + "title": "Dr.", + "first": "Martin", + "middle": "Luther", + "last": "King", + "suffix": "Jr." + }, + { + "id": "test_last_name_is_also_title_with_comma", + "raw": "Duke Martin Luther King, Jr.", + "title": "Duke", + "first": "Martin", + "middle": "Luther", + "last": "King", + "suffix": "Jr." + }, + { + "id": "test_last_name_is_also_title3", + "raw": "John King", + "first": "John", + "last": "King" + }, + { + "id": "test_title_with_conjunction", + "raw": "Secretary of State Hillary Clinton", + "title": "Secretary of State", + "first": "Hillary", + "last": "Clinton" + }, + { + "id": "test_compound_title_with_conjunction", + "raw": "Cardinal Secretary of State Hillary Clinton", + "title": "Cardinal Secretary of State", + "first": "Hillary", + "last": "Clinton" + }, + { + "id": "test_title_is_title", + "raw": "Coach", + "title": "Coach" + }, + { + "id": "test_conflict_with_chained_title_first_name_initial", + "raw": "U. S. Grant", + "first": "U.", + "middle": "S.", + "last": "Grant" + }, + { + "id": "test_chained_title_first_name_initial_with_no_period", + "raw": "US Magistrate Judge T Michael Putnam", + "title": "US Magistrate Judge", + "first": "T", + "middle": "Michael", + "last": "Putnam" + }, + { + "id": "test_chained_hyphenated_title", + "raw": "US Magistrate-Judge Elizabeth E Campbell", + "title": "US Magistrate-Judge", + "first": "Elizabeth", + "middle": "E", + "last": "Campbell" + }, + { + "id": "test_chained_hyphenated_title_with_comma_suffix", + "raw": "Mag-Judge Harwell G Davis, III", + "title": "Mag-Judge", + "first": "Harwell", + "middle": "G", + "last": "Davis", + "suffix": "III" + }, + { + "id": "test_title_starts_with_conjunction", + "raw": "The Rt Hon John Jones", + "title": "The Rt Hon", + "first": "John", + "last": "Jones" + }, + { + "id": "test_conjunction_before_title", + "raw": "The Lord of the Universe", + "title": "The Lord of the Universe" + }, + { + "id": "test_double_conjunction_on_title", + "raw": "Lord of the Universe", + "title": "Lord of the Universe" + }, + { + "id": "test_triple_conjunction_on_title", + "raw": "Lord and of the Universe", + "title": "Lord and of the Universe" + }, + { + "id": "test_multiple_conjunctions_on_multiple_titles", + "raw": "Lord of the Universe and Associate Supreme Queen of the World Lisa Simpson", + "title": "Lord of the Universe and Associate Supreme Queen of the World", + "first": "Lisa", + "last": "Simpson" + }, + { + "id": "test_title_with_last_initial_is_suffix", + "raw": "King John V.", + "title": "King", + "first": "John", + "last": "V." + }, + { + "id": "test_initials_also_suffix", + "raw": "Smith, J.R.", + "first": "J.R.", + "last": "Smith" + }, + { + "id": "test_two_title_parts_separated_by_periods", + "raw": "Lt.Gen. John A. Kenneth Doe IV", + "title": "Lt.Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title", + "raw": "Lt. Gen. John A. Kenneth Doe IV", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title_with_lastname_comma", + "raw": "Doe, Lt. Gen. John A. Kenneth IV", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "IV" + }, + { + "id": "test_two_part_title_with_suffix_comma", + "raw": "Lt. Gen. John A. Kenneth Doe, Jr.", + "title": "Lt. Gen.", + "first": "John", + "last": "Doe", + "middle": "A. Kenneth", + "suffix": "Jr." + }, + { + "id": "test_possible_conflict_with_middle_initial_that_could_be_suffix", + "raw": "Doe, Rev. John V, Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "V", + "suffix": "Jr." + }, + { + "id": "test_possible_conflict_with_suffix_that_could_be_initial", + "raw": "Doe, Rev. John A., V, Jr.", + "title": "Rev.", + "first": "John", + "last": "Doe", + "middle": "A.", + "suffix": "V, Jr." + }, + { + "id": "test_ben_as_first_name", + "raw": "Ben Johnson", + "first": "Ben", + "last": "Johnson" + }, + { + "id": "test_ben_as_first_name_with_middle_name", + "raw": "Ben Alex Johnson", + "first": "Ben", + "middle": "Alex", + "last": "Johnson" + }, + { + "id": "test_ben_as_middle_name", + "raw": "Alex Ben Johnson", + "first": "Alex", + "middle": "Ben", + "last": "Johnson" + }, + { + "id": "test_title_with_periods", + "raw": "Lt.Gov. John Doe", + "title": "Lt.Gov.", + "first": "John", + "last": "Doe" + }, + { + "id": "test_title_with_periods_lastname_comma", + "raw": "Doe, Lt.Gov. John", + "title": "Lt.Gov.", + "first": "John", + "last": "Doe" + } +] \ No newline at end of file diff --git a/pytest/names_test.py b/pytest/names_test.py new file mode 100644 index 0000000..c12e884 --- /dev/null +++ b/pytest/names_test.py @@ -0,0 +1,711 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function +from __future__ import unicode_literals +from io import open +import json +import os +import sys + +import pytest + +from nameparser import HumanName +from nameparser.config import CONSTANTS, Constants +from nameparser.util import u + +TEST_DATA_DIRECTORY = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "names" +) +print(TEST_DATA_DIRECTORY) + + +def load_bank(category): + filename = category + ".json" + test_bank_file = os.path.join(TEST_DATA_DIRECTORY, filename) + + with open(test_bank_file, "r", encoding="utf8") as infile: + # with io.open(test_bank_file, "r") as infile: + test_bank = json.load(infile, encoding="utf-8") + print("Loading {} cases for {} from {}.".format(len(test_bank), category, filename)) + return test_bank + + +def dict_entry_test(dict_entry): + hn = HumanName(dict_entry["raw"]) + for attr in hn._members: + actual = getattr(hn, attr) + expected = dict_entry.get(attr, CONSTANTS.empty_attribute_default) + assert actual == expected + + +def make_ids(entry): + return entry.get("id") or entry.get("raw") + + +class TestCoreFunctionality: + @pytest.mark.parametrize( + "entry", + [ + { + "id": "test_utf8", + "raw": "de la Véña, Jüan", + "first": "Jüan", + "last": "de la Véña", + }, + { + "id": "test_escaped_utf8_bytes", + "raw": b"B\xc3\xb6ck, Gerald", + "first": "Gerald", + "last": "Böck", + }, + { + "id": "test_conjunction_names", + "raw": "johnny y", + "first": "johnny", + "last": "y", + }, + { + "id": "test_prefixed_names", + "raw": "vai la", + "first": "vai", + "last": "la", + }, + ], + ids=make_ids, + ) + def test_basics(self, entry): + dict_entry_test(entry) + + def test_blank(self): + # This can't be parametrized in the same way as test_basics, because + # CONSTANTS.empty_attribute_default is itself paramatrized at the module level + dict_entry_test( + { + "id": "test_blank_name", + "raw": "", + "first": CONSTANTS.empty_attribute_default, + "last": CONSTANTS.empty_attribute_default, + } + ) + + def test_string_output(self,): + hn = HumanName("de la Véña, Jüan") + print(hn) + print(repr(hn)) + + @pytest.mark.parametrize( + "raw, length", [("Doe-Ray, Dr. John P., CLU, CFP, LUTC", 5), ("John Doe", 2)] + ) + def test_len(self, raw, length): + assert len(HumanName(raw)) == length + + def test_comparison(self): + hn1 = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("Dr. John P. Doe-Ray, CLU, CFP, LUTC") + assert hn1 == hn2 + assert hn1 is not hn2 + assert hn1 == "Dr. John P. Doe-Ray CLU, CFP, LUTC" + hn1 = HumanName("Doe, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("Dr. John P. Doe-Ray, CLU, CFP, LUTC") + assert hn1 != hn2 + assert hn1 != 0 + assert hn1 != "test" + assert hn1 != ["test"] + assert hn1 != {"test": hn2} + + def test_assignment_to_full_name(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + assert hn.first == "John" + assert hn.last == "Doe" + assert hn.middle == "A. Kenneth" + assert hn.suffix == "Jr." + hn.full_name = "Juan Velasquez y Garcia III" + assert hn.first == "Juan" + assert hn.last == "Velasquez y Garcia" + assert hn.suffix == "III" + + def test_get_full_name_attribute_references_internal_lists(self): + hn = HumanName("John Williams") + hn.first_list = ["Larry"] + assert hn.full_name, "Larry Williams" + + def test_assignment_to_attribute(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + hn.last = "de la Vega" + assert hn.last == "de la Vega" + hn.title = "test" + assert hn.title == "test" + hn.first = "test" + assert hn.first == "test" + hn.middle = "test" + assert hn.middle == "test" + hn.suffix = "test" + assert hn.suffix == "test" + with pytest.raises(TypeError): + hn.suffix = [["test"]] + with pytest.raises(TypeError): + hn.suffix = {"test": "test"} + + def test_assign_list_to_attribute(self): + hn = HumanName("John A. Kenneth Doe, Jr.") + hn.title = ["test1", "test2"] + assert hn.title == "test1 test2" + hn.first = ["test3", "test4"] + assert hn.first == "test3 test4" + hn.middle = ["test5", "test6", "test7"] + assert hn.middle == "test5 test6 test7" + hn.last = ["test8", "test9", "test10"] + assert hn.last == "test8 test9 test10" + hn.suffix = ["test"] + assert hn.suffix == "test" + + def test_comparison_case_insensitive(self): + hn1 = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + hn2 = HumanName("dr. john p. doe-Ray, CLU, CFP, LUTC") + assert hn1 == hn2 + assert hn1 is not hn2 + assert hn1 == "Dr. John P. Doe-ray clu, CFP, LUTC" + + def test_slice(self): + hn = HumanName("Doe-Ray, Dr. John P., CLU, CFP, LUTC") + assert list(hn), ["Dr.", "John", "P.", "Doe-Ray", "CLU, CFP, LUTC"] + assert hn[1:] == [ + "John", + "P.", + "Doe-Ray", + "CLU, CFP, LUTC", + hn.C.empty_attribute_default, + ] + assert hn[1:-2], ["John", "P.", "Doe-Ray"] + + def test_getitem(self): + hn = HumanName("Dr. John A. Kenneth Doe, Jr.") + assert hn["title"], "Dr." + assert hn["first"], "John" + assert hn["last"], "Doe" + assert hn["middle"], "A. Kenneth" + assert hn["suffix"], "Jr." + + def test_setitem(self): + hn = HumanName("Dr. John A. Kenneth Doe, Jr.") + hn["title"] = "test" + assert hn["title"], "test" + hn["last"] = ["test", "test2"] + assert hn["last"], "test test2" + with pytest.raises(TypeError): + hn["suffix"] = [["test"]] + with pytest.raises(TypeError): + hn["suffix"] = {"test": "test"} + + def test_surnames_list_attribute(self): + hn = HumanName("John Edgar Casey Williams III") + assert hn.surnames_list, ["Edgar", "Casey", "Williams"] + + def test_surnames_attribute(self): + hn = HumanName("John Edgar Casey Williams III") + assert hn.surnames == "Edgar Casey Williams" + + +class TestPickle: + + try: + import dill + + no_dill = False + except ImportError: + no_dill = True + + @pytest.mark.skipif(no_dill, reason="requires python-dill module to test pickling") + def test_config_pickle(self): + constants = Constants() + self.dill.pickles(constants) + + @pytest.mark.skipif(no_dill, reason="requires python-dill module to test pickling") + def test_name_instance_pickle(self): + hn = HumanName("Title First Middle Middle Last, Jr.") + self.dill.pickles(hn) + + +class TestHumanNameBruteForce: + @pytest.mark.parametrize("entry", load_bank("brute_force"), ids=make_ids) + def test_brute(self, entry): + dict_entry_test(entry) + + +class TestFirstNameHandling: + @pytest.mark.parametrize("entry", load_bank("first_name"), ids=make_ids) + def test_json_first_name(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail( + reason="# TODO: Seems 'Andrews, M.D.', Andrews should be treated as a last name" + "but other suffixes like 'George Jr.' should be first names. " + "Might be related to https://github.com/derek73/python-nameparser/issues/2" + ) + def test_assume_suffix_title_and_one_other_name_is_last_name(self): + hn = HumanName("Andrews, M.D.") + assert hn.suffix == "M.D." + assert hn.last == "Andrews" + + @pytest.mark.xfail + def test_first_name_is_prefix_if_three_parts(self): + """Not sure how to fix this without breaking Mr and Mrs""" + hn = HumanName("Mr. Van Nguyen") + assert hn.first == "Van" + assert hn.last == "Nguyen" + + +class TestHumanNameConjunction: + @pytest.mark.parametrize("entry", load_bank("conjunction"), ids=make_ids) + def test_json_conjunction(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail + def test_two_initials_conflict_with_conjunction(self): + # Supporting this seems to screw up titles with periods in them like M.B.A. + hn = HumanName("E.T. Smith") + assert hn.first == "E." + assert hn.middle == "T." + assert hn.last == "Smith" + + @pytest.mark.xfail + def test_conjunction_in_an_address_with_a_first_name_title(self): + hn = HumanName("Her Majesty Queen Elizabeth") + assert hn.title == "Her Majesty Queen" + # if you want to be technical, Queen is in FIRST_NAME_TITLES + assert hn.first == "Elizabeth" + + +class TestConstantsCustomization: + def test_add_title(self): + hn = HumanName("Te Awanui-a-Rangi Black", constants=None) + start_len = len(hn.C.titles) + assert start_len > 0 + hn.C.titles.add("te") + assert start_len + 1 == len(hn.C.titles) + hn.parse_full_name() + assert hn.title == "Te" + assert hn.first == "Awanui-a-Rangi" + assert hn.last == "Black" + + def test_remove_title(self): + hn = HumanName("Hon Solo", constants=None) + start_len = len(hn.C.titles) + assert start_len > 0 + hn.C.titles.remove("hon") + assert start_len - 1 == len(hn.C.titles) + hn.parse_full_name() + assert hn.first == "Hon" + assert hn.last == "Solo" + + def test_add_multiple_arguments(self): + hn = HumanName("Assoc Dean of Chemistry Robert Johns", constants=None) + hn.C.titles.add("dean", "Chemistry") + hn.parse_full_name() + assert hn.title == "Assoc Dean of Chemistry" + assert hn.first == "Robert" + assert hn.last == "Johns" + + def test_instances_can_have_own_constants(self): + hn = HumanName("", None) + hn2 = HumanName("") + hn.C.titles.remove("hon") + assert "hon" not in hn.C.titles + assert hn.has_own_config + assert "hon" in hn2.C.titles + assert not hn2.has_own_config + + def test_can_change_global_constants(self): + hn = HumanName("") + hn2 = HumanName("") + hn.C.titles.remove("hon") + assert "hon" not in hn.C.titles + assert "hon" not in hn2.C.titles + assert not hn.has_own_config + assert not hn2.has_own_config + # clean up so we don't mess up other tests + hn.C.titles.add("hon") + + def test_remove_multiple_arguments(self): + hn = HumanName("Ms Hon Solo", constants=None) + hn.C.titles.remove("hon", "ms") + hn.parse_full_name() + assert hn.first == "Ms" + assert hn.middle == "Hon" + assert hn.last == "Solo" + + def test_chain_multiple_arguments(self): + hn = HumanName("Dean Ms Hon Solo", constants=None) + hn.C.titles.remove("hon", "ms").add("dean") + hn.parse_full_name() + assert hn.title == "Dean" + assert hn.first == "Ms" + assert hn.middle == "Hon" + assert hn.last == "Solo" + + def test_empty_attribute_default(self): + from nameparser.config import CONSTANTS + + _orig = CONSTANTS.empty_attribute_default + CONSTANTS.empty_attribute_default = None + hn = HumanName("") + assert hn.title is None + assert hn.first is None + assert hn.middle is None + assert hn.last is None + assert hn.suffix is None + assert hn.nickname is None + CONSTANTS.empty_attribute_default = _orig + + def test_empty_attribute_on_instance(self): + hn = HumanName("", None) + hn.C.empty_attribute_default = None + assert hn.title is None + assert hn.first is None + assert hn.middle is None + assert hn.last is None + assert hn.suffix is None + assert hn.nickname is None + + def test_none_empty_attribute_string_formatting(self): + hn = HumanName("", None) + hn.C.empty_attribute_default = None + assert str(hn) == "" + + def test_add_constant_with_explicit_encoding(self): + c = Constants() + c.titles.add_with_encoding(b"b\351ck", encoding="latin_1") + assert "béck" in c.titles + + +class TestNickname: + @pytest.mark.parametrize("entry", load_bank("nickname"), ids=make_ids) + def test_json_nickname(self, entry): + dict_entry_test(entry) + + # http://code.google.com/p/python-nameparser/issues/detail?id=17 + def test_parenthesis_are_removed_from_name(self): + hn = HumanName("John Jones (Unknown)") + assert hn.first == "John" + assert hn.last == "Jones" + assert hn.nickname != CONSTANTS.empty_attribute_default + + # http://code.google.com/p/python-nameparser/issues/detail?id=17 + # not testing nicknames because we don't actually care about Google Docs here + def test_duplicate_parenthesis_are_removed_from_name(self): + hn = HumanName("John Jones (Google Docs), Jr. (Unknown)") + assert hn.first == "John" + assert hn.last == "Jones" + assert hn.suffix == "Jr." + assert hn.nickname != CONSTANTS.empty_attribute_default + + @pytest.mark.xfail + def test_nickname_and_last_name_with_title(self): + hn = HumanName('Senator "Rick" Edmonds') + assert hn.title == "Senator" + assert hn.first == CONSTANTS.empty_attribute_default + assert hn.last == "Edmonds" + assert hn.nickname == "Rick" + + +class TestPrefixes: + @pytest.mark.parametrize("entry", load_bank("prefix"), ids=make_ids) + def test_json_prefix(self, entry): + dict_entry_test(entry) + + +class TestSuffixes: + @pytest.mark.parametrize("entry", load_bank("suffix"), ids=make_ids) + def test_json_suffix(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail( + reason="TODO: handle conjunctions in last names" + " followed by first names clashing with suffixes" + ) + def test_potential_suffix_that_is_also_first_name_comma_with_conjunction(self): + hn = HumanName("De la Vina, Bart") + assert hn.first == "Bart" + assert hn.last == "De la Vina" + + @pytest.mark.xfail(reason="https://github.com/derek73/python-nameparser/issues/27") + def test_king(self): + hn = HumanName("Dr King Jr") + assert hn.title == "Dr" + assert hn.last == "King" + assert hn.suffix == "Jr" + + +class TestTitle: + @pytest.mark.parametrize("entry", load_bank("title"), ids=make_ids) + def test_json_title(self, entry): + dict_entry_test(entry) + + @pytest.mark.xfail(reason="TODO: fix handling of U.S.") + def test_chained_title_first_name_title_is_initials(self): + hn = HumanName("U.S. District Judge Marc Thomas Treadwell") + assert hn.title == "U.S. District Judge" + assert hn.first == "Marc" + assert hn.middle == "Thomas" + assert hn.last == "Treadwell" + + @pytest.mark.xfail( + reason=" 'ben' is removed from PREFIXES in v0.2.5" + "this test could re-enable this test if we decide to support 'ben' as a prefix" + ) + def test_title_multiple_titles_with_apostrophe_s(self): + hn = HumanName("The Right Hon. the President of the Queen's Bench Division") + assert hn.title == "The Right Hon. the President of the Queen's Bench Division" + + @pytest.mark.xfail + def test_ben_as_conjunction(self): + hn = HumanName("Ahmad ben Husain") + assert hn.first == "Ahmad" + assert hn.last == "ben Husain" + + +class TestHumanNameCapitalization: + @pytest.mark.parametrize("entry", load_bank("capitalization"), ids=make_ids) + def test_json_capitalization(self, entry): + hn = HumanName(entry["raw"]) + hn.capitalize() + if sys.version_info.major < 3: + assert u(hn) == entry["string"] + else: + assert str(hn) == entry["string"] + + @pytest.mark.parametrize( + "name, is_forced", + [ + ("Shirley Maclaine", {True: "Shirley MacLaine", False: "Shirley Maclaine"}), + ("Baron Mcyolo", {True: "Baron McYolo", False: "Baron Mcyolo"}), + ], + ) + @pytest.mark.parametrize("force", [True, False]) + def test_no_capitalization_change_unless_forced(self, name, is_forced, force): + hn = HumanName(name) + hn.capitalize(force=force) + assert str(hn) == is_forced[force] + + @pytest.mark.xfail( + reason="FIXME: this test does not pass due to a known issue " + "http://code.google.com/p/python-nameparser/issues/detail?id=22" + ) + def test_capitalization_exception_for_already_capitalized_III_KNOWN_FAILURE(self): + hn = HumanName("juan garcia III") + hn.capitalize() + assert str(hn) == "Juan Garcia III" + + +class TestHumanNameOutputFormat: + def test_formatting_init_argument(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)", string_format="TEST1") + assert u(hn) == "TEST1" + + def test_formatting_constants_attribute(self): + from nameparser.config import CONSTANTS + + _orig = CONSTANTS.string_format + CONSTANTS.string_format = "TEST2" + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + assert u(hn) == "TEST2" + CONSTANTS.string_format = _orig + + def test_quote_nickname_formating(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{last}, {title} {first} {middle}, {suffix} '{nickname}'" + assert u(hn) == "Doe, Rev John A. Kenneth, III 'Kenny'" + + def test_formating_removing_keys_from_format_string(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{last}, {title} {first} {middle}, {suffix}" + assert u(hn) == "Doe, Rev John A. Kenneth, III" + hn.string_format = "{last}, {title} {first} {middle}" + assert u(hn) == "Doe, Rev John A. Kenneth" + hn.string_format = "{last}, {first} {middle}" + assert u(hn) == "Doe, John A. Kenneth" + hn.string_format = "{last}, {first}" + assert u(hn) == "Doe, John" + hn.string_format = "{first} {last}" + assert u(hn) == "John Doe" + + def test_formating_removing_pieces_from_name_buckets(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.string_format = "{title} {first} {middle} {last} {suffix}" + assert u(hn) == "Rev John A. Kenneth Doe III" + hn.middle = "" + assert u(hn) == "Rev John Doe III" + hn.suffix = "" + assert u(hn) == "Rev John Doe" + hn.title = "" + assert u(hn) == "John Doe" + + def test_formating_of_nicknames_with_parenthesis(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" + assert u(hn) == "Rev John A. Kenneth Doe III (Kenny)" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_with_single_quotes(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" + assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_with_double_quotes(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = '{title} {first} {middle} {last} {suffix} "{nickname}"' + assert u(hn) == 'Rev John A. Kenneth Doe III "Kenny"' + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_formating_of_nicknames_in_middle(self): + hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") + hn.string_format = "{title} {first} ({nickname}) {middle} {last} {suffix}" + assert u(hn) == "Rev John (Kenny) A. Kenneth Doe III" + hn.nickname = "" + assert u(hn) == "Rev John A. Kenneth Doe III" + + def test_remove_emojis(self): + hn = HumanName("Sam Smith 😊") + assert hn.first == "Sam" + assert hn.last == "Smith" + assert u(hn) == "Sam Smith" + + def test_keep_non_emojis(self): + hn = HumanName("∫≜⩕ Smith 😊") + assert hn.first == "∫≜⩕" + assert hn.last == "Smith" + assert u(hn) == "∫≜⩕ Smith" + + def test_keep_emojis(self): + constants = Constants() + constants.regexes.emoji = False + hn = HumanName("∫≜⩕ Smith😊", constants) + assert hn.first == "∫≜⩕" + assert hn.last == "Smith😊" + assert u(hn) == "∫≜⩕ Smith😊" + # test cleanup + + +class TestHumanNameVariations: + """Test automated variations of names in TEST_NAMES. + + Helps test that the 3 code trees work the same""" + + @pytest.mark.parametrize("name", load_bank("bare_names")) + def test_json_variations(self, name): + self.run_variations(name) + + def run_variations(self, name): + """ Run several variations + + This is a separate function so that individual non-parametrized tests can be + added if desired. + """ + hn = HumanName(name) + if len(hn.suffix_list) > 1: + hn = HumanName( + "{title} {first} {middle} {last} {suffix}".format(**hn.as_dict()).split( + "," + )[0] + ) + # format strings below require empty string + hn.C.empty_attribute_default = "" + hn_dict = hn.as_dict() + nocomma = HumanName( + "{title} {first} {middle} {last} {suffix}".format(**hn_dict) + ) + lastnamecomma = HumanName( + "{last}, {title} {first} {middle} {suffix}".format(**hn_dict) + ) + if hn.suffix: + suffixcomma = HumanName( + "{title} {first} {middle} {last}, {suffix}".format(**hn_dict) + ) + if hn.nickname: + nocomma = HumanName( + "{title} {first} {middle} {last} {suffix} ({nickname})".format( + **hn_dict + ) + ) + lastnamecomma = HumanName( + "{last}, {title} {first} {middle} {suffix} ({nickname})".format( + **hn_dict + ) + ) + if hn.suffix: + suffixcomma = HumanName( + "{title} {first} {middle} {last}, {suffix} ({nickname})".format( + **hn_dict + ) + ) + for attr in hn._members: + assert getattr(hn, attr) == getattr(nocomma, attr) + assert getattr(hn, attr) == getattr(lastnamecomma, attr) + if hn.suffix: + assert getattr(hn, attr) == getattr(suffixcomma, attr) + + +class TestMaidenName: + + no_maiden_names = getattr(HumanName(), "maiden", None) is None + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_parenthesis_and_quotes_together(self): + hn = HumanName("Jennifer 'Jen' Jones (Duff)") + assert hn.first == "Jennifer" + assert hn.last == "Jones" + assert hn.nickname == "Jen" + assert hn.maiden == "Duff" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood nee Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_accented_nee(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood née Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee_and_comma(self): + # https://en.wiktionary.org/wiki/née + hn = HumanName("Mary Toogood, née Johnson") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_nee_with_parenthesis(self): + hn = HumanName("Mary Toogood (nee Johnson)") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + @pytest.mark.skipif(no_maiden_names, reason="Maiden names not implemented.") + def test_maiden_name_with_parenthesis(self): + hn = HumanName("Mary Toogood (Johnson)") + assert hn.first == "Mary" + assert hn.last == "Toogood" + assert hn.maiden == "Johnson" + + +if __name__ == "__main__": + # Pass through any/all arguments to pytest + pytest.main(sys.argv) diff --git a/pytest/pytest.ini b/pytest/pytest.ini new file mode 100644 index 0000000..b0e5a94 --- /dev/null +++ b/pytest/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore::DeprecationWarning \ No newline at end of file