Skip to content

Commit 908df83

Browse files
committed
remove unused code and table definition
1 parent 4e5528b commit 908df83

File tree

1 file changed

+145
-81
lines changed

1 file changed

+145
-81
lines changed

src/acquisition/rvdss/database.py

Lines changed: 145 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,11 @@
1212
=======================
1313
1414
`rvdss` is the table where rvdss data is stored.
15-
+----------+-------------+------+-----+---------+----------------+
16-
| Field | Type | Null | Key | Default | Extra |
17-
+----------+-------------+------+-----+---------+----------------+
18-
| id | int(11) | NO | PRI | NULL | auto_increment |
19-
| location | varchar(8) | NO | MUL | NULL | |
20-
| epiweek | int(11) | NO | MUL | NULL | |
21-
| value | float | NO | | NULL | |
22-
+----------+-------------+------+-----+---------+----------------+
23-
id: unique identifier for each record
24-
location: hhs1-10
25-
epiweek: the epiweek during which the queries were executed
26-
value: number of total test records per facility, within each epiweek
27-
28-
=================
29-
=== Changelog ===
30-
=================
31-
2017-12-14:
32-
* add "need update" check
33-
34-
2017-12-02:
35-
* original version
3615
"""
3716

3817
# standard library
3918
import argparse
19+
import numpy as np
4020

4121
# third party
4222
import mysql.connector
@@ -48,72 +28,156 @@
4828
import delphi.utils.epiweek as flu
4929
from delphi.utils.geo.locations import Locations
5030

51-
LOCATIONS = Locations.hhs_list
52-
DATAPATH = "/home/automation/rvdss_data"
5331

54-
55-
def update(locations, first=None, last=None, force_update=False, load_email=True):
56-
# download and prepare data first
57-
qd = rvdss.rvdssData(DATAPATH, load_email)
58-
if not qd.need_update and not force_update:
59-
print("Data not updated, nothing needs change.")
60-
return
61-
62-
qd_data = qd.load_csv()
63-
qd_measurements = qd.prepare_measurements(qd_data, start_weekday=4)
64-
qd_ts = rvdss.measurement_to_ts(qd_measurements, 7, startweek=first, endweek=last)
32+
respiratory_detections_cols= (
33+
"epiweek",
34+
"time_value",
35+
"issue",
36+
"geo_type",
37+
"geo_value",
38+
"sarscov2_tests",
39+
"sarscov2_positive_tests",
40+
"flu_tests",
41+
"flu_positive_tests",
42+
"fluah1n1pdm09_positive_tests",
43+
"fluah3_positive_tests",
44+
"fluauns_positive_tests",
45+
"flua_positive_tests",
46+
"flub_positive_tests",
47+
"rsv_tests",
48+
"rsv_positive_tests",
49+
"hpiv_tests",
50+
"hpiv1_positive_tests",
51+
"hpiv2_positive_tests",
52+
"hpiv3_positive_tests",
53+
"hpiv4_positive_tests",
54+
"hpivother_positive_tests",
55+
"adv_tests",
56+
"adv_positive_tests",
57+
"hmpv_tests",
58+
"hmpv_positive_tests",
59+
"evrv_tests",
60+
"evrv_positive_tests",
61+
"hcov_tests",
62+
"hcov_positive_tests",
63+
"week",
64+
"weekorder",
65+
"year"
66+
)
67+
68+
pct_positive_cols = (
69+
"epiweek",
70+
"time_value",
71+
"issue",
72+
"geo_type",
73+
"geo_value",
74+
"evrv_pct_positive",
75+
"evrv_tests",
76+
"evrv_positive_tests",
77+
"hpiv_pct_positive",
78+
"hpiv_tests",
79+
"hpiv_positive_tests",
80+
"adv_pct_positive",
81+
"adv_tests",
82+
"hcov_pct_positive",
83+
"hcov_tests",
84+
"hcov_positive_tests",
85+
"flua_pct_positive",
86+
"flub_pct_positive",
87+
"flu_tests",
88+
"flua_positive_tests",
89+
"flua_tests",
90+
"flub_tests",
91+
"flub_positive_tests",
92+
"flu_positive_tests",
93+
"flu_pct_positive",
94+
"hmpv_pct_positive",
95+
"hmpv_tests",
96+
"hmpv_positive_tests",
97+
"rsv_pct_positive",
98+
"rsv_tests",
99+
"rsv_positive_tests",
100+
"sarscov2_pct_positive",
101+
"sarscov2_tests",
102+
"sarscov2_positive_tests",
103+
"region",
104+
"week",
105+
"weekorder",
106+
"year"
107+
)
108+
109+
detections_counts_cols = (
110+
"epiweek",
111+
"time_value",
112+
"issue" ,
113+
"geo_type",
114+
"geo_value",
115+
"hpiv_positive_tests",
116+
"adv_positive_tests",
117+
"hmpv_positive_tests",
118+
"evrv_positive_tests",
119+
"hcov_positive_tests",
120+
"rsv_positive_tests",
121+
"flu_positive_tests"
122+
)
123+
124+
expected_table_names = {
125+
"respiratory_detection":"rvdss_repiratory_detections",
126+
"positive":"rvdss_pct_positive" ,
127+
"count": "rvdss_detections_counts"
128+
}
129+
130+
expected_columns = {
131+
"respiratory_detection":respiratory_detections_cols,
132+
"positive": pct_positive_cols,
133+
"count":detections_counts_cols
134+
}
135+
136+
def get_num_rows(cursor, table_name):
137+
cursor.execute("SELECT count(1) `num` FROM `{table_name}`")
138+
for (num,) in cursor:
139+
pass
140+
return num
141+
142+
def update(data_dict):
65143
# connect to the database
66144
u, p = secrets.db.epi
67145
cnx = mysql.connector.connect(user=u, password=p, database="epidata")
68146
cur = cnx.cursor()
69147

70-
def get_num_rows():
71-
cur.execute("SELECT count(1) `num` FROM `rvdss`")
72-
for (num,) in cur:
73-
pass
74-
return num
75-
76-
# check from 4 weeks preceeding the last week with data through this week
77-
cur.execute("SELECT max(`epiweek`) `ew0`, yearweek(now(), 6) `ew1` FROM `rvdss`")
78-
for (ew0, ew1) in cur:
79-
ew0 = 200401 if ew0 is None else flu.add_epiweeks(ew0, -4)
80-
ew0 = ew0 if first is None else first
81-
ew1 = ew1 if last is None else last
82-
print(f"Checking epiweeks between {int(ew0)} and {int(ew1)}...")
83-
84-
# keep track of how many rows were added
85-
rows_before = get_num_rows()
86-
87-
# check rvdss for new and/or revised data
88-
sql = """
89-
INSERT INTO
90-
`rvdss` (`location`, `epiweek`, `value`)
91-
VALUES
92-
(%s, %s, %s)
93-
ON DUPLICATE KEY UPDATE
94-
`value` = %s
95-
"""
96-
97-
total_rows = 0
98-
99-
for location in locations:
100-
if location not in qd_ts:
101-
continue
102-
ews = sorted(qd_ts[location].keys())
103-
num_missing = 0
104-
for ew in ews:
105-
v = qd_ts[location][ew]
106-
sql_data = (location, ew, v, v)
107-
cur.execute(sql, sql_data)
108-
total_rows += 1
109-
if v == 0:
110-
num_missing += 1
111-
if num_missing > 0:
112-
print(f" [{location}] missing {int(num_missing)}/{len(ews)} value(s)")
113-
114-
# keep track of how many rows were added
115-
rows_after = get_num_rows()
116-
print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
148+
149+
for tt in data_dict.keys():
150+
data = data_dict[tt]
151+
data_tuples = list(data.itertuples(index=False,name=None))
152+
# loop though table types
153+
table_name = expected_table_names[tt]
154+
cols = expected_columns[tt]
155+
place_holders= ', '.join(["?" for _ in cols])
156+
# field_names = ", ".join(
157+
# f"`{name}`" for name in cols)
158+
159+
# check rvdss for new and/or revised data
160+
# sql = f"""
161+
# INSERT INTO {table_name} ({field_names})
162+
# VALUES ({place_holders})
163+
# """
164+
165+
sql = f"""
166+
INSERT INTO {table_name}
167+
VALUES ({place_holders})
168+
"""
169+
170+
# keep track of how many rows were added
171+
rows_before = get_num_rows(cur,table_name)
172+
total_rows = 0
173+
174+
#insert data
175+
cur.executemany(sql, data_tuples)
176+
177+
# keep track of how many rows were added
178+
rows_after = get_num_rows(cur,table_name)
179+
print(f"Inserted {int(rows_after - rows_before)}/{int(total_rows)} row(s)")
180+
117181

118182
# cleanup
119183
cur.close()

0 commit comments

Comments
 (0)