remove unused code and table definition

cchuong · cchuong · commit 908df83583fd · 2025-05-13T01:00:50.000-07:00
diff --git a/src/acquisition/rvdss/database.py b/src/acquisition/rvdss/database.py
@@ -12,31 +12,11 @@
 =======================
 
 `rvdss` is the table where rvdss data is stored.
-+----------+-------------+------+-----+---------+----------------+
-| Field    | Type        | Null | Key | Default | Extra          |
-+----------+-------------+------+-----+---------+----------------+
-| id       | int(11)     | NO   | PRI | NULL    | auto_increment |
-| location | varchar(8)  | NO   | MUL | NULL    |                |
-| epiweek  | int(11)     | NO   | MUL | NULL    |                |
-| value    | float       | NO   |     | NULL    |                |
-+----------+-------------+------+-----+---------+----------------+
-id: unique identifier for each record
-location: hhs1-10
-epiweek: the epiweek during which the queries were executed
-value: number of total test records per facility, within each epiweek
-
-=================
-=== Changelog ===
-=================
-2017-12-14:
-  * add "need update" check
-
-2017-12-02:
-  * original version
 """
 
 # standard library
 import argparse
+import numpy as np
 
 # third party
 import mysql.connector
@@ -48,72 +28,156 @@
 import delphi.utils.epiweek as flu
 from delphi.utils.geo.locations import Locations
 
-LOCATIONS = Locations.hhs_list
-DATAPATH = "/home/automation/rvdss_data"
 
-
-def update(locations, first=None, last=None, force_update=False, load_email=True):
-    # download and prepare data first
-    qd = rvdss.rvdssData(DATAPATH, load_email)
-    if not qd.need_update and not force_update:
-        print("Data not updated, nothing needs change.")
-        return
-
-    qd_data = qd.load_csv()
-    qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
-    qd_ts = rvdss.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last)
+respiratory_detections_cols= (
+    "epiweek",
+    "time_value",
+    "issue",
+    "geo_type",
+    "geo_value",
+    "sarscov2_tests",
+    "sarscov2_positive_tests",
+    "flu_tests",
+    "flu_positive_tests",
+    "fluah1n1pdm09_positive_tests",
+    "fluah3_positive_tests",
+    "fluauns_positive_tests",
+    "flua_positive_tests",
+    "flub_positive_tests",
+    "rsv_tests",
+    "rsv_positive_tests",
+    "hpiv_tests",
+    "hpiv1_positive_tests",
+    "hpiv2_positive_tests",
+    "hpiv3_positive_tests",
+    "hpiv4_positive_tests",
+    "hpivother_positive_tests",
+    "adv_tests",
+    "adv_positive_tests",
+    "hmpv_tests",
+    "hmpv_positive_tests",
+    "evrv_tests",
+    "evrv_positive_tests",
+    "hcov_tests",
+    "hcov_positive_tests",
+    "week",
+    "weekorder",
+    "year"
+)
+
+pct_positive_cols = ( 
+    "epiweek",
+    "time_value",
+    "issue",
+    "geo_type",
+    "geo_value",
+    "evrv_pct_positive",
+    "evrv_tests",
+    "evrv_positive_tests",
+    "hpiv_pct_positive",
+    "hpiv_tests",
+    "hpiv_positive_tests",
+    "adv_pct_positive",
+    "adv_tests",
+    "hcov_pct_positive",
+    "hcov_tests",
+    "hcov_positive_tests",
+    "flua_pct_positive",
+    "flub_pct_positive",
+    "flu_tests",
+    "flua_positive_tests",
+    "flua_tests",
+    "flub_tests",
+    "flub_positive_tests",
+    "flu_positive_tests",
+    "flu_pct_positive",
+    "hmpv_pct_positive",
+    "hmpv_tests",
+    "hmpv_positive_tests",
+    "rsv_pct_positive",
+    "rsv_tests",
+    "rsv_positive_tests",
+    "sarscov2_pct_positive",
+    "sarscov2_tests",
+    "sarscov2_positive_tests",
+    "region",
+    "week",
+    "weekorder",
+    "year"
+)
+
+detections_counts_cols = (
+    "epiweek",
+    "time_value",
+    "issue" ,
+    "geo_type",
+    "geo_value",
+    "hpiv_positive_tests",
+    "adv_positive_tests",
+    "hmpv_positive_tests",
+    "evrv_positive_tests",
+    "hcov_positive_tests",
+    "rsv_positive_tests",
+    "flu_positive_tests"
+)
+
+expected_table_names = {
+    "respiratory_detection":"rvdss_repiratory_detections",
+    "positive":"rvdss_pct_positive" ,
+    "count": "rvdss_detections_counts"
+}
+
+expected_columns = {
+    "respiratory_detection":respiratory_detections_cols,
+    "positive": pct_positive_cols,
+    "count":detections_counts_cols
+}
+
+def get_num_rows(cursor, table_name):
+    cursor.execute("SELECT count(1) `num` FROM `{table_name}`")
+    for (num,) in cursor:
+        pass
+    return num
+
+def update(data_dict):
     # connect to the database
     u, p = secrets.db.epi
     cnx = mysql.connector.connect(user=u, password=p, database="epidata")
     cur = cnx.cursor()
 
-    def get_num_rows():
-        cur.execute("SELECT count(1) `num` FROM `rvdss`")
-        for (num,) in cur:
-            pass
-        return num
-
-    # check from 4 weeks preceeding the last week with data through this week
-    cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `rvdss`")
-    for (ew0, ew1) in cur:
-        ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
-    ew0 = ew0 if first is None else first
-    ew1 = ew1 if last is None else last
-    print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
-
-    # keep track of how many rows were added
-    rows_before = get_num_rows()
-
-    # check rvdss for new and/or revised data
-    sql = """
-    INSERT INTO
-      `rvdss` (`location`, `epiweek`, `value`)
-    VALUES
-      (%s, %s, %s)
-    ON DUPLICATE KEY UPDATE
-      `value` = %s
-    """
-
-    total_rows = 0
-
-    for location in locations:
-        if location not in qd_ts:
-            continue
-        ews = sorted(qd_ts[location].keys())
-        num_missing = 0
-        for ew in ews:
-            v = qd_ts[location][ew]
-            sql_data = (location, ew, v, v)
-            cur.execute(sql, sql_data)
-            total_rows += 1
-            if v == 0:
-                num_missing += 1
-        if num_missing > 0:
-            print(f" [{location}] missing {int(num_missing)}/{len(ews)} value(s)")
-
-    # keep track of how many rows were added
-    rows_after = get_num_rows()
-    print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
+
+    for tt in data_dict.keys():
+        data = data_dict[tt]
+        data_tuples = list(data.itertuples(index=False,name=None))
+        # loop though table types
+        table_name = expected_table_names[tt]
+        cols =  expected_columns[tt]
+        place_holders= ', '.join(["?" for _ in cols])
+        # field_names = ", ".join(
+        #         f"`{name}`" for name in cols)
+        
+        # check rvdss for new and/or revised data
+        # sql = f"""
+        # INSERT INTO {table_name} ({field_names})
+        # VALUES ({place_holders})
+        # """
+        
+        sql = f"""
+        INSERT INTO {table_name} 
+        VALUES ({place_holders})
+        """
+        
+        # keep track of how many rows were added
+        rows_before = get_num_rows(cur,table_name)
+        total_rows = 0
+        
+        #insert data 
+        cur.executemany(sql, data_tuples)
+        
+        # keep track of how many rows were added
+        rows_after = get_num_rows(cur,table_name)
+        print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
+    
 
     # cleanup
     cur.close()