Skip to content

Commit 183187a

Browse files
dshemetovkrivardmelange396
committed
CovidcastRow: address code review
Co-authored-by: Katie Mazaitis <[email protected]> Co-authored-by: melange396 <[email protected]>
1 parent 4b54dd6 commit 183187a

File tree

11 files changed

+295
-321
lines changed

11 files changed

+295
-321
lines changed

integrations/acquisition/covidcast/test_db.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from delphi_utils import Nans
22

33
from delphi.epidata.acquisition.covidcast.database import DBLoadStateException
4-
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
5-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
4+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
65

76

87
# all the Nans we use here are just one value, so this is a shortcut to it:
@@ -11,7 +10,7 @@
1110
class TestTest(CovidcastBase):
1211

1312
def _find_matches_for_row(self, row):
14-
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastRow
13+
# finds (if existing) row from both history and latest views that matches long-key of provided CovidcastTestRow
1514
cols = "source signal time_type time_value geo_type geo_value issue".split()
1615
results = {}
1716
cur = self._db._cursor
@@ -31,8 +30,8 @@ def _find_matches_for_row(self, row):
3130

3231
def test_insert_or_update_with_nonempty_load_table(self):
3332
# make rows
34-
a_row = CovidcastRow.make_default_row(time_value=2020_02_02)
35-
another_row = CovidcastRow.make_default_row(time_value=2020_02_03, issue=2020_02_03)
33+
a_row = CovidcastTestRow.make_default_row(time_value=2020_02_02)
34+
another_row = CovidcastTestRow.make_default_row(time_value=2020_02_03, issue=2020_02_03)
3635
# insert one
3736
self._db.insert_or_update_bulk([a_row])
3837
# put something into the load table
@@ -61,7 +60,7 @@ def test_id_sync(self):
6160
latest_view = 'epimetric_latest_v'
6261

6362
# add a data point
64-
base_row = CovidcastRow.make_default_row()
63+
base_row = CovidcastTestRow.make_default_row()
6564
self._insert_rows([base_row])
6665
# ensure the primary keys match in the latest and history tables
6766
matches = self._find_matches_for_row(base_row)
@@ -71,7 +70,7 @@ def test_id_sync(self):
7170
old_pk_id = matches[latest_view][pk_column]
7271

7372
# add a reissue for said data point
74-
next_row = CovidcastRow.make_default_row()
73+
next_row = CovidcastTestRow.make_default_row()
7574
next_row.issue += 1
7675
self._insert_rows([next_row])
7776
# ensure the new keys also match

integrations/acquisition/covidcast/test_delete_batch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# first party
99
import delphi.operations.secrets as secrets
1010
from delphi.epidata.acquisition.covidcast.database import Database
11-
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow, covidcast_rows_from_args
11+
from delphi.epidata.acquisition.covidcast.test_utils import covidcast_rows_from_args
1212

1313
# py3tester coverage target (equivalent to `import *`)
1414
__test_target__ = 'delphi.epidata.acquisition.covidcast.database'
@@ -57,6 +57,7 @@ def _test_delete_batch(self, cc_deletions):
5757
time_value = [0] * 5 + [1] * 5 + [0],
5858
geo_value = ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_nonlatest"] * 2 + ["d_latest"] * 3 + ["d_justone"],
5959
issue = [1, 2] + [1, 2, 3] + [1, 2] + [1, 2, 3] + [1],
60+
sanitize_fields = True
6061
)
6162

6263
self._db.insert_or_update_bulk(rows)

integrations/client/test_delphi_epidata.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
# third party
1313
import delphi.operations.secrets as secrets
1414
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_covidcast_meta_cache
15-
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
16-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
15+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
1716
from delphi.epidata.client.delphi_epidata import Epidata
1817
from delphi_utils import Nans
1918

@@ -52,13 +51,11 @@ def test_covidcast(self):
5251

5352
# insert placeholder data: three issues of one signal, one issue of another
5453
rows = [
55-
CovidcastRow.make_default_row(issue=2020_02_02 + i, value=i, lag=i)
54+
CovidcastTestRow.make_default_row(issue=2020_02_02 + i, value=i, lag=i)
5655
for i in range(3)
5756
]
5857
row_latest_issue = rows[-1]
59-
rows.append(
60-
CovidcastRow.make_default_row(signal="sig2")
61-
)
58+
rows.append(CovidcastTestRow.make_default_row(signal="sig2"))
6259
self._insert_rows(rows)
6360

6461
with self.subTest(name='request two signals'):
@@ -222,10 +219,10 @@ def test_geo_value(self):
222219
# insert placeholder data: three counties, three MSAs
223220
N = 3
224221
rows = [
225-
CovidcastRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
222+
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
226223
for i in range(N)
227224
] + [
228-
CovidcastRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
225+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
229226
for i in range(N)
230227
]
231228
self._insert_rows(rows)
@@ -277,7 +274,11 @@ def test_covidcast_meta(self):
277274
# 2nd issue: 1 11 21
278275
# 3rd issue: 2 12 22
279276
rows = [
280-
CovidcastRow.make_default_row(time_value=DEFAULT_TIME_VALUE + t, issue=DEFAULT_ISSUE + i, value=t*10 + i)
277+
CovidcastTestRow.make_default_row(
278+
time_value=DEFAULT_TIME_VALUE + t,
279+
issue=DEFAULT_ISSUE + i,
280+
value=t*10 + i
281+
)
281282
for i in range(3) for t in range(3)
282283
]
283284
self._insert_rows(rows)
@@ -324,10 +325,10 @@ def test_async_epidata(self):
324325
# insert placeholder data: three counties, three MSAs
325326
N = 3
326327
rows = [
327-
CovidcastRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
328+
CovidcastTestRow.make_default_row(geo_type="county", geo_value=str(i)*5, value=i)
328329
for i in range(N)
329330
] + [
330-
CovidcastRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
331+
CovidcastTestRow.make_default_row(geo_type="msa", geo_value=str(i)*5, value=i*10)
331332
for i in range(N)
332333
]
333334
self._insert_rows(rows)

integrations/server/test_covidcast.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010

1111
# first party
1212
from delphi_utils import Nans
13-
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
14-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
13+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
1514

1615
# use the local instance of the Epidata API
1716
BASE_URL = 'http://delphi_web_epidata/epidata/api.php'
@@ -23,7 +22,7 @@ def localSetUp(self):
2322
"""Perform per-test setup."""
2423
self._db._cursor.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"')
2524

26-
def request_based_on_row(self, row: CovidcastRow, extract_response: Callable = lambda x: x.json(), **kwargs):
25+
def request_based_on_row(self, row: CovidcastTestRow, extract_response: Callable = lambda x: x.json(), **kwargs):
2726
params = self.params_from_row(row, endpoint='covidcast', **kwargs)
2827
response = requests.get(BASE_URL, params=params)
2928
response.raise_for_status()
@@ -32,53 +31,53 @@ def request_based_on_row(self, row: CovidcastRow, extract_response: Callable = l
3231
return response
3332

3433
def _insert_placeholder_set_one(self):
35-
row = CovidcastRow.make_default_row()
34+
row = CovidcastTestRow.make_default_row()
3635
self._insert_rows([row])
3736
return row
3837

3938
def _insert_placeholder_set_two(self):
4039
rows = [
41-
CovidcastRow.make_default_row(geo_type='county', geo_value=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
40+
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
4241
for i in [1, 2, 3]
4342
] + [
4443
# geo value intended to overlap with counties above
45-
CovidcastRow.make_default_row(geo_type='msa', geo_value=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
44+
CovidcastTestRow.make_default_row(geo_type='msa', geo_value=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
4645
for i in [4, 5, 6]
4746
]
4847
self._insert_rows(rows)
4948
return rows
5049

5150
def _insert_placeholder_set_three(self):
5251
rows = [
53-
CovidcastRow.make_default_row(geo_type='county', geo_value='11111', time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
52+
CovidcastTestRow.make_default_row(geo_type='county', geo_value='11111', time_value=2000_01_01+i, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=2-i)
5453
for i in [1, 2, 3]
5554
] + [
5655
# time value intended to overlap with 11111 above, with disjoint geo values
57-
CovidcastRow.make_default_row(geo_type='county', geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
56+
CovidcastTestRow.make_default_row(geo_type='county', geo_value=str(i)*5, time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03, lag=5-i)
5857
for i in [4, 5, 6]
5958
]
6059
self._insert_rows(rows)
6160
return rows
6261

6362
def _insert_placeholder_set_four(self):
6463
rows = [
65-
CovidcastRow.make_default_row(source='src1', signal=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
64+
CovidcastTestRow.make_default_row(source='src1', signal=str(i)*5, value=i*1., stderr=i*10., sample_size=i*100.)
6665
for i in [1, 2, 3]
6766
] + [
6867
# signal intended to overlap with the signal above
69-
CovidcastRow.make_default_row(source='src2', signal=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
68+
CovidcastTestRow.make_default_row(source='src2', signal=str(i-3)*5, value=i*1., stderr=i*10., sample_size=i*100.)
7069
for i in [4, 5, 6]
7170
]
7271
self._insert_rows(rows)
7372
return rows
7473

7574
def _insert_placeholder_set_five(self):
7675
rows = [
77-
CovidcastRow.make_default_row(time_value=2000_01_01, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03+i)
76+
CovidcastTestRow.make_default_row(time_value=2000_01_01, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03+i)
7877
for i in [1, 2, 3]
7978
] + [
8079
# different time_values, same issues
81-
CovidcastRow.make_default_row(time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03+i-3)
80+
CovidcastTestRow.make_default_row(time_value=2000_01_01+i-3, value=i*1., stderr=i*10., sample_size=i*100., issue=2000_01_03+i-3)
8281
for i in [4, 5, 6]
8382
]
8483
self._insert_rows(rows)
@@ -366,7 +365,7 @@ def test_unique_key_constraint(self):
366365
def test_nullable_columns(self):
367366
"""Missing values should be surfaced as null."""
368367

369-
row = CovidcastRow.make_default_row(
368+
row = CovidcastTestRow.make_default_row(
370369
stderr=None, sample_size=None,
371370
missing_stderr=Nans.OTHER.value, missing_sample_size=Nans.OTHER.value
372371
)
@@ -388,7 +387,7 @@ def test_temporal_partitioning(self):
388387

389388
# insert placeholder data
390389
rows = [
391-
CovidcastRow.make_default_row(time_type=tt)
390+
CovidcastTestRow.make_default_row(time_type=tt)
392391
for tt in "hour day week month year".split()
393392
]
394393
self._insert_rows(rows)

integrations/server/test_covidcast_endpoints.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,16 @@
11
"""Integration tests for the custom `covidcast/*` endpoints."""
22

33
# standard library
4-
from copy import copy
5-
from itertools import accumulate, chain
6-
from typing import Iterable, Dict, Any, List, Sequence
7-
import unittest
84
from io import StringIO
9-
10-
# from typing import Optional
11-
from dataclasses import dataclass
5+
from typing import Sequence
126

137
# third party
14-
import mysql.connector
15-
from more_itertools import interleave_longest, windowed
8+
from more_itertools import windowed
169
import requests
1710
import pandas as pd
18-
import numpy as np
19-
from delphi_utils import Nans
2011

2112
from delphi.epidata.acquisition.covidcast.covidcast_meta_cache_updater import main as update_cache
22-
from delphi.epidata.acquisition.covidcast.covidcast_row import CovidcastRow
23-
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase
13+
from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow
2414

2515
# use the local instance of the Epidata API
2616
BASE_URL = "http://delphi_web_epidata/epidata/covidcast"
@@ -64,7 +54,7 @@ def _smooth_rows(self, rows: Sequence[float]):
6454

6555
def test_basic(self):
6656
"""Request a signal from the / endpoint."""
67-
rows = [CovidcastRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(10)]
57+
rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(10)]
6858
first = rows[0]
6959
self._insert_rows(rows)
7060

@@ -78,7 +68,7 @@ def test_basic(self):
7868

7969
def test_compatibility(self):
8070
"""Request at the /api.php endpoint."""
81-
rows = [CovidcastRow.make_default_row(source="src", signal="sig", time_value=2020_04_01 + i, value=i) for i in range(10)]
71+
rows = [CovidcastTestRow.make_default_row(source="src", signal="sig", time_value=2020_04_01 + i, value=i) for i in range(10)]
8272
first = rows[0]
8373
self._insert_rows(rows)
8474

@@ -94,7 +84,7 @@ def test_trend(self):
9484
"""Request a signal from the /trend endpoint."""
9585

9686
num_rows = 30
97-
rows = [CovidcastRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(num_rows)]
87+
rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(num_rows)]
9888
first = rows[0]
9989
last = rows[-1]
10090
ref = rows[num_rows // 2]
@@ -130,7 +120,7 @@ def test_trendseries(self):
130120
"""Request a signal from the /trendseries endpoint."""
131121

132122
num_rows = 3
133-
rows = [CovidcastRow.make_default_row(time_value=2020_04_01 + i, value=num_rows - i) for i in range(num_rows)]
123+
rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=num_rows - i) for i in range(num_rows)]
134124
first = rows[0]
135125
last = rows[-1]
136126
self._insert_rows(rows)
@@ -196,10 +186,10 @@ def test_correlation(self):
196186
"""Request a signal from the /correlation endpoint."""
197187

198188
num_rows = 30
199-
reference_rows = [CovidcastRow.make_default_row(signal="ref", time_value=20200401 + i, value=i) for i in range(num_rows)]
189+
reference_rows = [CovidcastTestRow.make_default_row(signal="ref", time_value=20200401 + i, value=i) for i in range(num_rows)]
200190
first = reference_rows[0]
201191
self._insert_rows(reference_rows)
202-
other_rows = [CovidcastRow.make_default_row(signal="other", time_value=20200401 + i, value=i) for i in range(num_rows)]
192+
other_rows = [CovidcastTestRow.make_default_row(signal="other", time_value=20200401 + i, value=i) for i in range(num_rows)]
203193
other = other_rows[0]
204194
self._insert_rows(other_rows)
205195
max_lag = 3
@@ -222,23 +212,29 @@ def test_correlation(self):
222212
def test_csv(self):
223213
"""Request a signal from the /csv endpoint."""
224214

225-
rows = [CovidcastRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(10)]
215+
rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=i) for i in range(10)]
226216
first = rows[0]
227217
self._insert_rows(rows)
228218

229219
response = requests.get(
230220
f"{BASE_URL}/csv",
231221
params=dict(signal=first.signal_pair(), start_day="2020-04-01", end_day="2020-12-12", geo_type=first.geo_type),
232222
)
223+
response.raise_for_status()
224+
out = response.text
225+
df = pd.read_csv(StringIO(out), index_col=0)
226+
self.assertEqual(df.shape, (len(rows), 10))
227+
self.assertEqual(list(df.columns), ["geo_value", "signal", "time_value", "issue", "lag", "value", "stderr", "sample_size", "geo_type", "data_source"])
228+
233229

234230
def test_backfill(self):
235231
"""Request a signal from the /backfill endpoint."""
236232

237233
TEST_DATE_VALUE = 2020_04_01
238234
num_rows = 10
239-
issue_0 = [CovidcastRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i, sample_size=1, lag=0, issue=TEST_DATE_VALUE + i) for i in range(num_rows)]
240-
issue_1 = [CovidcastRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i + 1, sample_size=2, lag=1, issue=TEST_DATE_VALUE + i + 1) for i in range(num_rows)]
241-
last_issue = [CovidcastRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i + 2, sample_size=3, lag=2, issue=TEST_DATE_VALUE + i + 2) for i in range(num_rows)] # <-- the latest issues
235+
issue_0 = [CovidcastTestRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i, sample_size=1, lag=0, issue=TEST_DATE_VALUE + i) for i in range(num_rows)]
236+
issue_1 = [CovidcastTestRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i + 1, sample_size=2, lag=1, issue=TEST_DATE_VALUE + i + 1) for i in range(num_rows)]
237+
last_issue = [CovidcastTestRow.make_default_row(time_value=TEST_DATE_VALUE + i, value=i + 2, sample_size=3, lag=2, issue=TEST_DATE_VALUE + i + 2) for i in range(num_rows)] # <-- the latest issues
242238
self._insert_rows([*issue_0, *issue_1, *last_issue])
243239
first = issue_0[0]
244240

@@ -264,7 +260,7 @@ def test_meta(self):
264260
"""Request a signal from the /meta endpoint."""
265261

266262
num_rows = 10
267-
rows = [CovidcastRow.make_default_row(time_value=2020_04_01 + i, value=i, source="fb-survey", signal="smoothed_cli") for i in range(num_rows)]
263+
rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=i, source="fb-survey", signal="smoothed_cli") for i in range(num_rows)]
268264
self._insert_rows(rows)
269265
first = rows[0]
270266
last = rows[-1]
@@ -305,7 +301,7 @@ def test_coverage(self):
305301

306302
num_geos_per_date = [10, 20, 30, 40, 44]
307303
dates = [2020_04_01 + i for i in range(len(num_geos_per_date))]
308-
rows = [CovidcastRow.make_default_row(time_value=dates[i], value=i, geo_value=str(geo_value)) for i, num_geo in enumerate(num_geos_per_date) for geo_value in range(num_geo)]
304+
rows = [CovidcastTestRow.make_default_row(time_value=dates[i], value=i, geo_value=str(geo_value)) for i, num_geo in enumerate(num_geos_per_date) for geo_value in range(num_geo)]
309305
self._insert_rows(rows)
310306
first = rows[0]
311307

0 commit comments

Comments
 (0)