Skip to content

Commit 9f52dba

Browse files
committed
fix filter
1 parent 3937c42 commit 9f52dba

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

src/datasets/arrow_dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3230,7 +3230,12 @@ def filter(
32303230
input_columns=input_columns,
32313231
desc=desc,
32323232
)
3233-
return Dataset(self.data, info=indices.info, indices_table=indices.data, fingerprint=new_fingerprint)
3233+
info = self.info.copy()
3234+
if info.splits:
3235+
for split_name, split_info in info.splits.items():
3236+
split_info.num_examples = indices.info.splits[split_name].num_examples
3237+
split_info.num_bytes = None
3238+
return Dataset(self.data, info=info, indices_table=indices.data, fingerprint=new_fingerprint)
32343239

32353240
@transmit_format
32363241
@fingerprint_transform(inplace=False, ignore_kwargs=["cache_file_name"])

0 commit comments

Comments
 (0)