@@ -1905,10 +1905,10 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
1905
1905
return ;
1906
1906
}
1907
1907
1908
- const int64_t t_start_sample_us = ggml_time_us ();
1909
-
1910
1908
llama_sample_softmax (ctx, candidates);
1911
1909
1910
+ const int64_t t_start_sample_us = ggml_time_us ();
1911
+
1912
1912
// Compute the cumulative probabilities
1913
1913
float cum_sum = 0 .0f ;
1914
1914
size_t last_idx = candidates->size ;
@@ -1937,9 +1937,8 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
1937
1937
return ;
1938
1938
}
1939
1939
1940
- const int64_t t_start_sample_us = ggml_time_us ();
1941
-
1942
1940
llama_sample_softmax (nullptr , candidates);
1941
+ const int64_t t_start_sample_us = ggml_time_us ();
1943
1942
1944
1943
// Compute the first and second derivatives
1945
1944
std::vector<float > first_derivatives (candidates->size - 1 );
@@ -1991,11 +1990,11 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
1991
1990
return ;
1992
1991
}
1993
1992
1994
- const int64_t t_start_sample_us = ggml_time_us ();
1995
-
1996
1993
// Compute the softmax of logits and calculate entropy
1997
1994
llama_sample_softmax (nullptr , candidates);
1998
1995
1996
+ const int64_t t_start_sample_us = ggml_time_us ();
1997
+
1999
1998
float entropy = 0 .0f ;
2000
1999
for (size_t i = 0 ; i < candidates->size ; ++i) {
2001
2000
entropy += -candidates->data [i].p * logf (candidates->data [i].p );
@@ -2164,13 +2163,11 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
2164
2163
2165
2164
if (ctx) {
2166
2165
ctx->t_sample_us += ggml_time_us () - t_start_sample_us;
2167
- ctx->n_sample ++;
2168
2166
}
2169
2167
return X;
2170
2168
}
2171
2169
2172
2170
llama_token llama_sample_token_mirostat_v2 (struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, float * mu) {
2173
- assert (ctx);
2174
2171
int64_t t_start_sample_us;
2175
2172
t_start_sample_us = ggml_time_us ();
2176
2173
@@ -2185,13 +2182,14 @@ llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_tok
2185
2182
candidates->size = 1 ;
2186
2183
}
2187
2184
2185
+ if (ctx) {
2186
+ ctx->t_sample_us += ggml_time_us () - t_start_sample_us;
2187
+ }
2188
+
2188
2189
// Normalize the probabilities of the remaining words
2189
2190
llama_sample_softmax (ctx, candidates);
2190
2191
2191
2192
// Sample the next word X from the remaining words
2192
- if (ctx) {
2193
- ctx->t_sample_us += ggml_time_us () - t_start_sample_us;
2194
- }
2195
2193
llama_token X = llama_sample_token (ctx, candidates);
2196
2194
t_start_sample_us = ggml_time_us ();
2197
2195
0 commit comments