Skip to content

Commit ee5c877

Browse files
authored
Merge pull request #51 from Imageomics/feature/long
Recognize `long` and `lon` for longitude, also removes case-sensitivity (sets all columns to capitalized values).
2 parents c14bd04 + 71cfc40 commit ee5c877

11 files changed

+81
-54
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@ For full dashboard functionality, upload a CSV or XLS file with the following co
1212
- `Sex`: Sex of each sample.
1313
- `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown').
1414
- `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90].
15-
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180].
15+
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180]. `long` will also be accepted.
1616
- `file_url`*: URL to access file.
1717

1818
***Note:**
19+
- Column names are **not** case-sensitive.
1920
- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view.
2021
- `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included.
2122
- `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided.

components/divs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
{'label': 'Subspecies', 'value': 'Subspecies'},
1919
{'label':'View', 'value': 'View'},
2020
{'label': 'Sex', 'value': 'Sex'},
21-
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
22-
{'label': 'Locality', 'value': 'locality'}
21+
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
22+
{'label': 'Locality', 'value': 'Locality'}
2323
]
2424
DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works"
2525
DOCS_LINK = html.A("documentation",
@@ -196,8 +196,8 @@ def get_img_div(df, all_species, img_url):
196196
style = QUARTER_DIV_STYLE
197197
),
198198
html.Div([
199-
dcc.Checklist(df.hybrid_stat.unique(),
200-
df.hybrid_stat.unique()[0:2],
199+
dcc.Checklist(df.Hybrid_stat.unique(),
200+
df.Hybrid_stat.unique()[0:2],
201201
id = 'hybrid?')],
202202
style = QUARTER_DIV_STYLE
203203
),

components/graphs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def make_map(df, color_by):
5454
# only use entries that have valid lat & lon for mapping
5555
df = df.loc[df['lat-lon'].str.contains('unknown') == False]
5656
fig = px.scatter_mapbox(df,
57-
lat = "lat",
58-
lon = "lon",
57+
lat = "Lat",
58+
lon = "Lon",
5959
#projection = "natural earth",
6060
custom_data = ["Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"],
6161
size = "Samples_at_locality",
@@ -64,8 +64,8 @@ def make_map(df, color_by):
6464
title = "Distribution of Samples")
6565

6666
fig.update_traces(hovertemplate =
67-
"Latitude: %{lat}<br>"+
68-
"Longitude: %{lon}<br>" +
67+
"Latitude: %{Lat}<br>"+
68+
"Longitude: %{Lon}<br>" +
6969
"Samples at lat/lon: %{customdata[0]}<br>" +
7070
"Species at lat/lon: %{customdata[1]}<br>" +
7171
"Subspecies at lat/lon: %{customdata[2]}<br>"

components/query.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def get_data(df, mapping, features):
1717
df - DataFrame of the data to visualize.
1818
mapping - Boolean. True when lat/lon are given in dataset.
1919
features - List of features (columns) included in the DataFrame. This is a subset of the suggested columns:
20-
'Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'
20+
'Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'
2121
2222
Returns:
2323
--------
@@ -29,24 +29,24 @@ def get_data(df, mapping, features):
2929
# Will likely choose to calculate and return this in later instance
3030
cat_list = [{'label': 'Species', 'value': 'Species'},
3131
{'label': 'Subspecies', 'value': 'Subspecies'},
32-
{'label':'View', 'value': 'View'},
32+
{'label': 'View', 'value': 'View'},
3333
{'label': 'Sex', 'value': 'Sex'},
34-
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
35-
{'label': 'Locality', 'value': 'locality'}
34+
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
35+
{'label': 'Locality', 'value': 'Locality'}
3636
]
3737

3838
df = df.copy()
3939
df = df.fillna('unknown')
40-
features.append('locality')
40+
features.append('Locality')
4141

4242
# If we don't have lat/lon, just return DataFrame with otherwise required features.
4343
if not mapping:
44-
if 'locality' not in df.columns:
45-
df['locality'] = 'unknown'
44+
if 'Locality' not in df.columns:
45+
df['Locality'] = 'unknown'
4646
return df[features], cat_list
4747

4848
# else lat and lon are in dataset, so process locality information
49-
df['lat-lon'] = df['lat'].astype(str) + '|' + df['lon'].astype(str)
49+
df['lat-lon'] = df['Lat'].astype(str) + '|' + df['Lon'].astype(str)
5050
df["Samples_at_locality"] = df['lat-lon'].map(df['lat-lon'].value_counts()) # will duplicate if multiple views of same sample
5151

5252
# Count and record number of species and subspecies at each lat-lon
@@ -56,8 +56,8 @@ def get_data(df, mapping, features):
5656
df.loc[df['lat-lon'] == lat_lon, "Species_at_locality"] = ", ".join(species_list)
5757
df.loc[df['lat-lon'] == lat_lon, "Subspecies_at_locality"] = ", ".join(subspecies_list)
5858

59-
if 'locality' not in df.columns:
60-
df['locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
59+
if 'Locality' not in df.columns:
60+
df['Locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
6161

6262
new_features = ['lat-lon', "Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"]
6363
for feature in new_features:
@@ -157,7 +157,7 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images):
157157
df_sub = df.loc[df.Subspecies.isin(subspecies)].copy()
158158
df_sub = df_sub.loc[df_sub.View.isin(view)]
159159
df_sub = df_sub.loc[df_sub.Sex.isin(sex)]
160-
df_sub = df_sub.loc[df_sub.hybrid_stat.isin(hybrid)]
160+
df_sub = df_sub.loc[df_sub.Hybrid_stat.isin(hybrid)]
161161

162162
num_entries = len(df_sub)
163163
# Filter out any entries that have missing filenames or URLs:

dashboard.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,21 @@ def parse_contents(contents, filename):
8181
# If no image urls, disable sample image options
8282
mapping = True
8383
img_urls = True
84-
features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
84+
features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
8585
included_features = []
86+
df.columns = df.columns.str.capitalize()
8687
for feature in features:
8788
if feature not in list(df.columns):
88-
if feature == 'lat' or feature == 'lon':
89-
mapping = False
90-
elif feature == 'file_url':
89+
if feature == 'Lat' or feature == 'Lon':
90+
if feature == 'Lon':
91+
if 'Long' not in list(df.columns):
92+
mapping = False
93+
else:
94+
df = df.rename(columns = {"Long": "Lon"})
95+
included_features.append('Lon')
96+
else:
97+
mapping = False
98+
elif feature == 'File_url':
9199
img_urls = False
92100
elif feature == 'Image_filename':
93101
# If 'Image_filename' missing, return missing column if 'file_url' is included.
@@ -102,10 +110,10 @@ def parse_contents(contents, filename):
102110
if mapping:
103111
try:
104112
# Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180])
105-
valid_lat = df['lat'].astype(float).between(-90, 90)
106-
df.loc[~valid_lat, 'lat'] = 'unknown'
107-
valid_lon = df['lon'].astype(float).between(-180, 180)
108-
df.loc[~valid_lon, 'lon'] = 'unknown'
113+
valid_lat = df['Lat'].astype(float).between(-90, 90)
114+
df.loc[~valid_lat, 'Lat'] = 'unknown'
115+
valid_lon = df['Lon'].astype(float).between(-180, 180)
116+
df.loc[~valid_lon, 'Lon'] = 'unknown'
109117
except ValueError as e:
110118
print(e)
111119
return json.dumps({'error': {'mapping': str(e)}})

test_data/HCGSD_test_latLong.csv

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,long,speciesdesig,file_url
2+
10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
3+
10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana,
4+
10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,89,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
5+
10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
6+
10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-74,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
7+
10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
8+
10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
9+
10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica,
10+
10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,73,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
11+
10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/

tests/components/test_divs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_get_img_div():
2929
'Subspecies': ['subspecies1', 'subspecies2', 'subspecies4'],
3030
'View': ['ventral', 'ventral', 'dorsal'],
3131
'Sex': ['male', 'female', 'female'],
32-
'hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
32+
'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
3333
}
3434
df = pd.DataFrame(data = data)
3535

tests/components/test_graphs.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44

55
# Define test data
66
df = pd.read_csv("test_data/HCGSD_full_testNA.csv")
7-
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
7+
# Update columns since not running through parse
8+
df.columns = df.columns.str.capitalize()
9+
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
810
processed_df, cat_list = get_data(df, True, included_features)
911

1012
def test_make_hist_plot():

tests/components/test_query.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,23 @@ def test_get_data(self):
2323
data = {
2424
'Species': ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'],
2525
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', None],
26-
'lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
27-
'lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
26+
'Lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
27+
'Lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
2828
}
2929
cat_list = [{'label': 'Species', 'value': 'Species'},
3030
{'label': 'Subspecies', 'value': 'Subspecies'},
3131
{'label':'View', 'value': 'View'},
3232
{'label': 'Sex', 'value': 'Sex'},
33-
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
34-
{'label': 'Locality', 'value': 'locality'}]
35-
features = ['Species', 'Subspecies', 'lat', 'lon']
33+
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
34+
{'label': 'Locality', 'value': 'Locality'}]
35+
features = ['Species', 'Subspecies', 'Lat', 'Lon']
3636
locality = ['-13.43|-70.38', '5.25|-55.25', '5.25|-55.25', '9.9|-83.73','5.25|-55.25', '9.9|-55.25']
3737

3838
# Test with mapping = True (location data)
3939
df = pd.DataFrame(data = data)
4040
result_df, result_list = get_data(df, True, features)
4141
self.assertEqual(result_df['lat-lon'].tolist(), locality)
42-
self.assertEqual(result_df['locality'].tolist(), locality)
42+
self.assertEqual(result_df['Locality'].tolist(), locality)
4343
self.assertEqual(result_df["Samples_at_locality"].tolist(), [1,3,3,1,3,1])
4444
self.assertEqual(result_df["Species_at_locality"].tolist(), ['melpomene', 'melpomene, erato', 'melpomene, erato', 'melpomene', 'melpomene, erato', 'species3'])
4545
self.assertEqual(result_df["Subspecies_at_locality"].tolist(), ['schunkei', 'nanna, erato, guarica', 'nanna, erato, guarica', 'rosina_N', 'nanna, erato, guarica', 'unknown'])
@@ -48,8 +48,7 @@ def test_get_data(self):
4848
# Test with mapping = False (no location data)
4949
df2 = pd.DataFrame(data = {key: data[key] for key in ['Species', 'Subspecies']})
5050
result_df2, result2_list = get_data(df2, False, features[:2])
51-
#self.assertEqual('locality' not in result_df2.columns, True)
52-
self.assertEqual(result_df2['locality'].tolist(), ['unknown' for i in range(len(locality))])
51+
self.assertEqual(result_df2['Locality'].tolist(), ['unknown' for i in range(len(locality))])
5352
self.assertEqual(result_df2["Species"].tolist(), ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'])
5453
self.assertEqual(result_df2["Subspecies"].tolist(), ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'unknown'])
5554
self.assertEqual(result2_list, cat_list)
@@ -62,7 +61,7 @@ def test_get_filenames(self):
6261
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'subspecies6', 'subspecies6'],
6362
'View': ['ventral', 'ventral', 'ventral', 'dorsal', 'dorsal', 'ventral', 'dorsal'],
6463
'Sex': ['male', 'female', 'female', 'male', 'female', 'male', 'female'],
65-
'hybrid_stat': ['subspecies synonym',
64+
'Hybrid_stat': ['subspecies synonym',
6665
'valid subspecies',
6766
'subspecies synonym',
6867
'valid subspecies',

tests/test_app_callbacks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dashboard import update_dist_view, update_dist_plot, update_pie_plot, set_subspecies_options, update_display
44

55
# Define test data
6-
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","hybrid_stat","lat","lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
6+
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","Hybrid_stat","Lat","Lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
77
'all_species': {'Erato': ['Any-Erato', 'notabilis', 'petiverana', 'phyllis', 'guarica'], 'Unknown': ['Any-Unknown', 'petiverana', 'plesseni'], 'Melpomene': ['Any-Melpomene', 'unknown', 'rosina_S', 'plesseni', 'nanna'], 'Any': ['Any', 'notabilis', 'petiverana', 'phyllis', 'plesseni', 'unknown', 'rosina_S', 'guarica', 'nanna']},
88
'mapping': True,
99
'images': True}

0 commit comments

Comments
 (0)