19
19
20
20
struct Stats {
21
21
std::vector<float > values;
22
+ std::vector<int > counts;
22
23
int ncall = 0 ;
23
24
};
24
25
@@ -121,12 +122,10 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
121
122
auto & e = m_stats[wname];
122
123
123
124
++e.ncall ;
124
- // NOTE: since we select top-k experts, the number of calls for the expert tensors will be k times larger
125
- // using the following line, we can correct for that if needed by replacing the line above with:
126
- // if (idx == t->src[0]->ne[0] - 1) ++e.ncall;
127
125
128
126
if (e.values .empty ()) {
129
127
e.values .resize (src1->ne [0 ]*n_as, 0 );
128
+ e.counts .resize (src1->ne [0 ]*n_as, 0 );
130
129
}
131
130
else if (e.values .size () != (size_t )src1->ne [0 ]*n_as) {
132
131
fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname.c_str (), (int )e.values .size (), (int )src1->ne [0 ]*n_as);
@@ -153,6 +152,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
153
152
154
153
for (int j = 0 ; j < (int )src1->ne [0 ]; ++j) {
155
154
e.values [e_start + j] += x[j]*x[j];
155
+ e.counts [e_start + j]++;
156
156
}
157
157
}
158
158
}
@@ -170,6 +170,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
170
170
auto & e = m_stats[wname];
171
171
if (e.values .empty ()) {
172
172
e.values .resize (src1->ne [0 ], 0 );
173
+ e.counts .resize (src1->ne [0 ], 0 );
173
174
}
174
175
else if (e.values .size () != (size_t )src1->ne [0 ]) {
175
176
fprintf (stderr, " Oops: inconsistent size for %s (%d vs %d)\n " , wname.c_str (), (int )e.values .size (), (int )src1->ne [0 ]);
@@ -183,6 +184,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
183
184
const float * x = data + row * src1->ne [0 ];
184
185
for (int j = 0 ; j < (int )src1->ne [0 ]; ++j) {
185
186
e.values [j] += x[j]*x[j];
187
+ e.counts [j]++;
186
188
}
187
189
}
188
190
if (e.ncall > m_last_call) {
@@ -222,7 +224,13 @@ void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) co
222
224
out.write ((const char *) &p.second .ncall , sizeof (p.second .ncall ));
223
225
int nval = p.second .values .size ();
224
226
out.write ((const char *) &nval, sizeof (nval));
225
- if (nval > 0 ) out.write ((const char *) p.second .values .data (), nval * sizeof (float ));
227
+ if (nval > 0 ) {
228
+ std::vector<float > tmp (nval);
229
+ for (int i = 0 ; i < nval; i++) {
230
+ tmp[i] = (p.second .values [i] / static_cast <float >(p.second .counts [i])) * static_cast <float >(p.second .ncall );
231
+ }
232
+ out.write ((const char *)tmp.data (), nval*sizeof (float ));
233
+ }
226
234
}
227
235
228
236
// Write the number of call the matrix was computed with
@@ -270,14 +278,28 @@ bool IMatrixCollector::load_imatrix(const char * imatrix_file, std::unordered_ma
270
278
imatrix_data = {};
271
279
return false ;
272
280
}
273
- e.values .resize (nval);
274
- in.read ((char *)e.values .data (), nval*sizeof (float ));
281
+
282
+ // When re-called from load_imatrix() with add set, this will already be created.
283
+ if (e.values .empty ()) {
284
+ e.values .resize (nval, 0 );
285
+ e.counts .resize (nval, 0 );
286
+ }
287
+
288
+ std::vector<float > tmp (nval);
289
+ in.read ((char *)tmp.data (), nval*sizeof (float ));
275
290
if (in.fail ()) {
276
291
printf (" %s: failed reading data for entry %d\n " ,__func__,i);
277
292
imatrix_data = {};
278
293
return false ;
279
294
}
280
- e.ncall = ncall;
295
+
296
+ // Recreate the state as expected by save_imatrix(), and corerct for weighted sum.
297
+ for (int i = 0 ; i < nval; i++) {
298
+ e.values [i] += tmp[i];
299
+ e.counts [i] += ncall;
300
+ }
301
+ e.ncall += ncall;
302
+
281
303
}
282
304
return true ;
283
305
}
0 commit comments