@@ -811,6 +811,7 @@ enum e_model {
811811 MODEL_7B,
812812 MODEL_13B,
813813 MODEL_30B,
814+ MODEL_40B,
814815 MODEL_65B,
815816 MODEL_70B,
816817};
@@ -1489,9 +1490,10 @@ static const char * llama_model_type_name(e_model type) {
14891490 case MODEL_7B: return " 7B" ;
14901491 case MODEL_13B: return " 13B" ;
14911492 case MODEL_30B: return " 30B" ;
1493+ case MODEL_40B: return " 40B" ;
14921494 case MODEL_65B: return " 65B" ;
14931495 case MODEL_70B: return " 70B" ;
1494- default : GGML_ASSERT ( false ) ;
1496+ default : return " ?B " ;
14951497 }
14961498}
14971499
@@ -1555,40 +1557,29 @@ static void llm_load_hparams(
15551557 case LLM_ARCH_LLAMA:
15561558 {
15571559 GGUF_GET_KEY (ctx, hparams.f_norm_rms_eps , gguf_get_val_f32, GGUF_TYPE_FLOAT32, true , kv (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS));
1560+
1561+ switch (hparams.n_layer ) {
1562+ case 26 : model.type = e_model::MODEL_3B; break ;
1563+ case 32 : model.type = e_model::MODEL_7B; break ;
1564+ case 40 : model.type = e_model::MODEL_13B; break ;
1565+ case 60 : model.type = e_model::MODEL_30B; break ;
1566+ case 80 : model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break ;
1567+ default : model.type = e_model::MODEL_UNKNOWN;
1568+ }
15581569 } break ;
15591570 case LLM_ARCH_FALCON:
15601571 {
15611572 GGUF_GET_KEY (ctx, hparams.f_norm_eps , gguf_get_val_f32, GGUF_TYPE_FLOAT32, true , kv (LLM_KV_ATTENTION_LAYERNORM_EPS));
1573+
1574+ switch (hparams.n_layer ) {
1575+ case 32 : model.type = e_model::MODEL_7B; break ;
1576+ case 60 : model.type = e_model::MODEL_40B; break ;
1577+ default : model.type = e_model::MODEL_UNKNOWN;
1578+ }
15621579 } break ;
15631580 default : (void )0 ;
15641581 };
15651582
1566- // TODO: generalize to non-LLaMA models
1567- switch (hparams.n_layer ) {
1568- case 26 : model.type = e_model::MODEL_3B; break ;
1569- case 32 : model.type = e_model::MODEL_7B; break ;
1570- case 40 : model.type = e_model::MODEL_13B; break ;
1571- case 60 : model.type = e_model::MODEL_30B; break ;
1572- case 80 : model.type = e_model::MODEL_65B; break ;
1573- default :
1574- {
1575- if (hparams.n_layer < 32 ) {
1576- model.type = e_model::MODEL_7B;
1577- }
1578- } break ;
1579- }
1580-
1581- // LLaMAv2
1582- // TODO: probably not needed
1583- {
1584- const auto n_gqa = hparams.n_gqa ();
1585-
1586- if (model.type == e_model::MODEL_65B && n_gqa == 8 ) {
1587- LLAMA_LOG_WARN (" %s: assuming 70B model based on GQA == %d\n " , __func__, n_gqa);
1588- model.type = e_model::MODEL_70B;
1589- }
1590- }
1591-
15921583 model.ftype = ml.ftype ;
15931584
15941585 hparams.n_ctx = n_ctx;
@@ -5015,7 +5006,10 @@ int llama_model_n_embd(const struct llama_model * model) {
50155006}
50165007
50175008int llama_model_type (const struct llama_model * model, char * buf, size_t buf_size) {
5018- return snprintf (buf, buf_size, " LLaMA %s %s" , llama_model_type_name (model->type ), llama_model_ftype_name (model->ftype ).c_str ());
5009+ return snprintf (buf, buf_size, " %s %s %s" ,
5010+ model->name .c_str (),
5011+ llama_model_type_name (model->type ),
5012+ llama_model_ftype_name (model->ftype ).c_str ());
50195013}
50205014
50215015int llama_model_quantize (
0 commit comments