@@ -378,7 +378,9 @@ def process_subset_opt(opt, subset):
378
378
379
379
elif opt == "all" :
380
380
concat_over .update (
381
- set (getattr (datasets [0 ], subset )) - set (datasets [0 ].dims )
381
+ set ().union (
382
+ * list ((set (getattr (d , subset )) - set (d .dims ) for d in datasets ))
383
+ )
382
384
)
383
385
elif opt == "minimal" :
384
386
pass
@@ -553,16 +555,35 @@ def get_indexes(name):
553
555
data = var .set_dims (dim ).values
554
556
yield PandasIndex (data , dim , coord_dtype = var .dtype )
555
557
558
+ # preserve variable order for variables in first dataset
559
+ data_var_order = list (datasets [0 ].variables )
560
+ # append additional variables to the end
561
+ data_var_order += [e for e in data_names if e not in data_var_order ]
562
+ # create concatenation index, needed for later reindexing
563
+ concat_index = list (range (sum (concat_dim_lengths )))
564
+
556
565
# stack up each variable and/or index to fill-out the dataset (in order)
557
566
# n.b. this loop preserves variable order, needed for groupby.
558
- for name in datasets [ 0 ]. variables :
567
+ for name in data_var_order :
559
568
if name in concat_over and name not in result_indexes :
560
- try :
561
- vars = ensure_common_dims ([ds [name ].variable for ds in datasets ])
562
- except KeyError :
563
- raise ValueError (f"{ name !r} is not present in all datasets." )
564
-
565
- # Try concatenate the indexes, concatenate the variables when no index
569
+ variables = []
570
+ variable_index = []
571
+ for i , ds in enumerate (datasets ):
572
+ if name in ds .variables :
573
+ variables .append (ds .variables [name ])
574
+ # add to variable index, needed for reindexing
575
+ variable_index .extend (
576
+ [sum (concat_dim_lengths [:i ]) + k for k in range (concat_dim_lengths [i ])]
577
+ )
578
+ else :
579
+ # raise if coordinate not in all datasets
580
+ if name in coord_names :
581
+ raise ValueError (
582
+ f"coordinate { name !r} not present in all datasets."
583
+ )
584
+ vars = list (ensure_common_dims (variables ))
585
+
586
+ # Try to concatenate the indexes, concatenate the variables when no index
566
587
# is found on all datasets.
567
588
indexes : list [Index ] = list (get_indexes (name ))
568
589
if indexes :
@@ -586,9 +607,28 @@ def get_indexes(name):
586
607
)
587
608
result_vars [k ] = v
588
609
else :
589
- combined_var = concat_vars (
590
- vars , dim , positions , combine_attrs = combine_attrs
591
- )
610
+ # if variable is only present in one dataset of multiple datasets,
611
+ # then do not concat
612
+ if len (variables ) == 1 and len (datasets ) > 1 :
613
+ combined_var = variables [0 ]
614
+ # only concat if variable is in multiple datasets
615
+ # or if single dataset (GH1988)
616
+ else :
617
+ combined_var = concat_vars (
618
+ vars , dim , positions , combine_attrs = combine_attrs
619
+ )
620
+ # reindex if variable is not present in all datasets
621
+ if len (variable_index ) < len (concat_index ):
622
+ try :
623
+ fill = fill_value [name ]
624
+ except (TypeError , KeyError ):
625
+ fill = fill_value
626
+ combined_var = (
627
+ DataArray (data = combined_var , name = name )
628
+ .assign_coords ({dim : variable_index })
629
+ .reindex ({dim : concat_index }, fill_value = fill )
630
+ .variable
631
+ )
592
632
result_vars [name ] = combined_var
593
633
594
634
elif name in result_vars :
0 commit comments