diff --git a/nssp/delphi_nssp/constants.py b/nssp/delphi_nssp/constants.py index 0abb68c29..ddd2e74b8 100644 --- a/nssp/delphi_nssp/constants.py +++ b/nssp/delphi_nssp/constants.py @@ -37,6 +37,6 @@ "timestamp": "datetime64[ns]", "geography": str, "county": str, - "fips": int, + "fips": str, } ) diff --git a/nssp/delphi_nssp/pull.py b/nssp/delphi_nssp/pull.py index 5769cca82..ece94fab4 100644 --- a/nssp/delphi_nssp/pull.py +++ b/nssp/delphi_nssp/pull.py @@ -67,5 +67,8 @@ def pull_nssp_data(socrata_token: str): except KeyError as exc: raise ValueError(warn_string(df_ervisits, TYPE_DICT)) from exc + # Format county fips to all be 5 digits with leading zeros + df_ervisits["fips"] = df_ervisits["fips"].apply(lambda x: str(x).zfill(5) if str(x) != "0" else "0") + keep_columns = ["timestamp", "geography", "county", "fips"] return df_ervisits[SIGNALS + keep_columns] diff --git a/nssp/tests/test_data/page.txt b/nssp/tests/test_data/page.txt index af5f25922..34dfa71a8 100644 --- a/nssp/tests/test_data/page.txt +++ b/nssp/tests/test_data/page.txt @@ -61,5 +61,26 @@ "hsa_nci_id": "All", "fips": "0", "trend_source": "United States" + }, + { + "week_end": "2023-05-13T00:00:00.000", + "geography": "Colorado", + "county": "Jefferson", + "percent_visits_combined": "0.84", + "percent_visits_covid": "0.59", + "percent_visits_influenza": "0.23", + "percent_visits_rsv": "0.03", + "percent_visits_smoothed": "0.83", + "percent_visits_smoothed_covid": "0.62", + "percent_visits_smoothed_1": "0.18", + "percent_visits_smoothed_rsv": "0.02", + "ed_trends_covid": "Decreasing", + "ed_trends_influenza": "No Change", + "ed_trends_rsv": "Decreasing", + "hsa": "Denver (Denver), CO - Jefferson, CO", + "hsa_counties": "Adams, Arapahoe, Clear Creek, Denver, Douglas, Elbert, Gilpin, Grand, Jefferson, Park, Summit", + "hsa_nci_id": "688", + "fips": "8059", + "trend_source": "HSA" } ] diff --git a/nssp/tests/test_pull.py b/nssp/tests/test_pull.py index cdc85a908..b356341f6 100644 --- a/nssp/tests/test_pull.py +++ b/nssp/tests/test_pull.py @@ -49,6 +49,7 @@ def test_pull_nssp_data(self, mock_socrata): assert result["geography"].notnull().all(), "geography has rogue NaN" assert result["county"].notnull().all(), "county has rogue NaN" assert result["fips"].notnull().all(), "fips has rogue NaN" + assert result["fips"].apply(lambda x: isinstance(x, str) and len(x) != 4).all(), "fips formatting should always be 5 digits; include leading zeros if aplicable" # Check for each signal in SIGNALS for signal in SIGNALS: