|
12 | 12 | =======================
|
13 | 13 |
|
14 | 14 | `rvdss` is the table where rvdss data is stored.
|
15 |
| -+----------+-------------+------+-----+---------+----------------+ |
16 |
| -| Field | Type | Null | Key | Default | Extra | |
17 |
| -+----------+-------------+------+-----+---------+----------------+ |
18 |
| -| id | int(11) | NO | PRI | NULL | auto_increment | |
19 |
| -| location | varchar(8) | NO | MUL | NULL | | |
20 |
| -| epiweek | int(11) | NO | MUL | NULL | | |
21 |
| -| value | float | NO | | NULL | | |
22 |
| -+----------+-------------+------+-----+---------+----------------+ |
23 |
| -id: unique identifier for each record |
24 |
| -location: hhs1-10 |
25 |
| -epiweek: the epiweek during which the queries were executed |
26 |
| -value: number of total test records per facility, within each epiweek |
27 |
| -
|
28 |
| -================= |
29 |
| -=== Changelog === |
30 |
| -================= |
31 |
| -2017-12-14: |
32 |
| - * add "need update" check |
33 |
| -
|
34 |
| -2017-12-02: |
35 |
| - * original version |
36 | 15 | """
|
37 | 16 |
|
38 | 17 | # standard library
|
39 | 18 | import argparse
|
| 19 | +import numpy as np |
40 | 20 |
|
41 | 21 | # third party
|
42 | 22 | import mysql.connector
|
|
48 | 28 | import delphi.utils.epiweek as flu
|
49 | 29 | from delphi.utils.geo.locations import Locations
|
50 | 30 |
|
51 |
| -LOCATIONS = Locations.hhs_list |
52 |
| -DATAPATH = "/home/automation/rvdss_data" |
53 | 31 |
|
54 |
| - |
55 |
| -def update(locations, first=None, last=None, force_update=False, load_email=True): |
56 |
| - # download and prepare data first |
57 |
| - qd = rvdss.rvdssData(DATAPATH, load_email) |
58 |
| - if not qd.need_update and not force_update: |
59 |
| - print("Data not updated, nothing needs change.") |
60 |
| - return |
61 |
| - |
62 |
| - qd_data = qd.load_csv() |
63 |
| - qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4) |
64 |
| - qd_ts = rvdss.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last) |
| 32 | +respiratory_detections_cols= ( |
| 33 | + "epiweek", |
| 34 | + "time_value", |
| 35 | + "issue", |
| 36 | + "geo_type", |
| 37 | + "geo_value", |
| 38 | + "sarscov2_tests", |
| 39 | + "sarscov2_positive_tests", |
| 40 | + "flu_tests", |
| 41 | + "flu_positive_tests", |
| 42 | + "fluah1n1pdm09_positive_tests", |
| 43 | + "fluah3_positive_tests", |
| 44 | + "fluauns_positive_tests", |
| 45 | + "flua_positive_tests", |
| 46 | + "flub_positive_tests", |
| 47 | + "rsv_tests", |
| 48 | + "rsv_positive_tests", |
| 49 | + "hpiv_tests", |
| 50 | + "hpiv1_positive_tests", |
| 51 | + "hpiv2_positive_tests", |
| 52 | + "hpiv3_positive_tests", |
| 53 | + "hpiv4_positive_tests", |
| 54 | + "hpivother_positive_tests", |
| 55 | + "adv_tests", |
| 56 | + "adv_positive_tests", |
| 57 | + "hmpv_tests", |
| 58 | + "hmpv_positive_tests", |
| 59 | + "evrv_tests", |
| 60 | + "evrv_positive_tests", |
| 61 | + "hcov_tests", |
| 62 | + "hcov_positive_tests", |
| 63 | + "week", |
| 64 | + "weekorder", |
| 65 | + "year" |
| 66 | +) |
| 67 | + |
| 68 | +pct_positive_cols = ( |
| 69 | + "epiweek", |
| 70 | + "time_value", |
| 71 | + "issue", |
| 72 | + "geo_type", |
| 73 | + "geo_value", |
| 74 | + "evrv_pct_positive", |
| 75 | + "evrv_tests", |
| 76 | + "evrv_positive_tests", |
| 77 | + "hpiv_pct_positive", |
| 78 | + "hpiv_tests", |
| 79 | + "hpiv_positive_tests", |
| 80 | + "adv_pct_positive", |
| 81 | + "adv_tests", |
| 82 | + "hcov_pct_positive", |
| 83 | + "hcov_tests", |
| 84 | + "hcov_positive_tests", |
| 85 | + "flua_pct_positive", |
| 86 | + "flub_pct_positive", |
| 87 | + "flu_tests", |
| 88 | + "flua_positive_tests", |
| 89 | + "flua_tests", |
| 90 | + "flub_tests", |
| 91 | + "flub_positive_tests", |
| 92 | + "flu_positive_tests", |
| 93 | + "flu_pct_positive", |
| 94 | + "hmpv_pct_positive", |
| 95 | + "hmpv_tests", |
| 96 | + "hmpv_positive_tests", |
| 97 | + "rsv_pct_positive", |
| 98 | + "rsv_tests", |
| 99 | + "rsv_positive_tests", |
| 100 | + "sarscov2_pct_positive", |
| 101 | + "sarscov2_tests", |
| 102 | + "sarscov2_positive_tests", |
| 103 | + "region", |
| 104 | + "week", |
| 105 | + "weekorder", |
| 106 | + "year" |
| 107 | +) |
| 108 | + |
| 109 | +detections_counts_cols = ( |
| 110 | + "epiweek", |
| 111 | + "time_value", |
| 112 | + "issue" , |
| 113 | + "geo_type", |
| 114 | + "geo_value", |
| 115 | + "hpiv_positive_tests", |
| 116 | + "adv_positive_tests", |
| 117 | + "hmpv_positive_tests", |
| 118 | + "evrv_positive_tests", |
| 119 | + "hcov_positive_tests", |
| 120 | + "rsv_positive_tests", |
| 121 | + "flu_positive_tests" |
| 122 | +) |
| 123 | + |
| 124 | +expected_table_names = { |
| 125 | + "respiratory_detection":"rvdss_repiratory_detections", |
| 126 | + "positive":"rvdss_pct_positive" , |
| 127 | + "count": "rvdss_detections_counts" |
| 128 | +} |
| 129 | + |
| 130 | +expected_columns = { |
| 131 | + "respiratory_detection":respiratory_detections_cols, |
| 132 | + "positive": pct_positive_cols, |
| 133 | + "count":detections_counts_cols |
| 134 | +} |
| 135 | + |
| 136 | +def get_num_rows(cursor, table_name): |
| 137 | + cursor.execute("SELECT count(1) `num` FROM `{table_name}`") |
| 138 | + for (num,) in cursor: |
| 139 | + pass |
| 140 | + return num |
| 141 | + |
| 142 | +def update(data_dict): |
65 | 143 | # connect to the database
|
66 | 144 | u, p = secrets.db.epi
|
67 | 145 | cnx = mysql.connector.connect(user=u, password=p, database="epidata")
|
68 | 146 | cur = cnx.cursor()
|
69 | 147 |
|
70 |
| - def get_num_rows(): |
71 |
| - cur.execute("SELECT count(1) `num` FROM `rvdss`") |
72 |
| - for (num,) in cur: |
73 |
| - pass |
74 |
| - return num |
75 |
| - |
76 |
| - # check from 4 weeks preceeding the last week with data through this week |
77 |
| - cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `rvdss`") |
78 |
| - for (ew0, ew1) in cur: |
79 |
| - ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4) |
80 |
| - ew0 = ew0 if first is None else first |
81 |
| - ew1 = ew1 if last is None else last |
82 |
| - print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...") |
83 |
| - |
84 |
| - # keep track of how many rows were added |
85 |
| - rows_before = get_num_rows() |
86 |
| - |
87 |
| - # check rvdss for new and/or revised data |
88 |
| - sql = """ |
89 |
| - INSERT INTO |
90 |
| - `rvdss` (`location`, `epiweek`, `value`) |
91 |
| - VALUES |
92 |
| - (%s, %s, %s) |
93 |
| - ON DUPLICATE KEY UPDATE |
94 |
| - `value` = %s |
95 |
| - """ |
96 |
| - |
97 |
| - total_rows = 0 |
98 |
| - |
99 |
| - for location in locations: |
100 |
| - if location not in qd_ts: |
101 |
| - continue |
102 |
| - ews = sorted(qd_ts[location].keys()) |
103 |
| - num_missing = 0 |
104 |
| - for ew in ews: |
105 |
| - v = qd_ts[location][ew] |
106 |
| - sql_data = (location, ew, v, v) |
107 |
| - cur.execute(sql, sql_data) |
108 |
| - total_rows += 1 |
109 |
| - if v == 0: |
110 |
| - num_missing += 1 |
111 |
| - if num_missing > 0: |
112 |
| - print(f" [{location}] missing {int(num_missing)}/{len(ews)} value(s)") |
113 |
| - |
114 |
| - # keep track of how many rows were added |
115 |
| - rows_after = get_num_rows() |
116 |
| - print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)") |
| 148 | + |
| 149 | + for tt in data_dict.keys(): |
| 150 | + data = data_dict[tt] |
| 151 | + data_tuples = list(data.itertuples(index=False,name=None)) |
| 152 | + # loop though table types |
| 153 | + table_name = expected_table_names[tt] |
| 154 | + cols = expected_columns[tt] |
| 155 | + place_holders= ', '.join(["?" for _ in cols]) |
| 156 | + # field_names = ", ".join( |
| 157 | + # f"`{name}`" for name in cols) |
| 158 | + |
| 159 | + # check rvdss for new and/or revised data |
| 160 | + # sql = f""" |
| 161 | + # INSERT INTO {table_name} ({field_names}) |
| 162 | + # VALUES ({place_holders}) |
| 163 | + # """ |
| 164 | + |
| 165 | + sql = f""" |
| 166 | + INSERT INTO {table_name} |
| 167 | + VALUES ({place_holders}) |
| 168 | + """ |
| 169 | + |
| 170 | + # keep track of how many rows were added |
| 171 | + rows_before = get_num_rows(cur,table_name) |
| 172 | + total_rows = 0 |
| 173 | + |
| 174 | + #insert data |
| 175 | + cur.executemany(sql, data_tuples) |
| 176 | + |
| 177 | + # keep track of how many rows were added |
| 178 | + rows_after = get_num_rows(cur,table_name) |
| 179 | + print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)") |
| 180 | + |
117 | 181 |
|
118 | 182 | # cleanup
|
119 | 183 | cur.close()
|
|
0 commit comments