@@ -151,8 +151,11 @@ class Params:
151151 n_head_kv : int
152152 f_norm_eps : float
153153
154+ rope_scaling_type : gguf .RopeScalingType | None = None
154155 f_rope_freq_base : float | None = None
155156 f_rope_scale : float | None = None
157+ n_orig_ctx : int | None = None
158+ rope_finetuned : bool | None = None
156159
157160 ftype : GGMLFileType | None = None
158161
@@ -198,20 +201,20 @@ def guessed(model: LazyModel) -> Params:
198201 def loadHFTransformerJson (model : LazyModel , config_path : Path ) -> Params :
199202 config = json .load (open (config_path ))
200203
201- n_vocab = config ["vocab_size" ]
202- n_embd = config ["hidden_size" ]
203- n_layer = config ["num_hidden_layers" ]
204- n_ff = config ["intermediate_size" ]
205- n_head = config ["num_attention_heads" ]
206- n_head_kv = config ["num_key_value_heads" ] if "num_key_value_heads" in config else n_head
207- f_norm_eps = config ["rms_norm_eps" ]
208- f_rope_freq_base = config ["rope_theta" ] if "rope_theta" in config else None
209-
204+ rope_scaling_type = f_rope_scale = n_orig_ctx = rope_finetuned = None
210205 rope_scaling = config .get ("rope_scaling" )
211- if isinstance (rope_scaling , dict ) and rope_scaling .get ("type" ) == "linear" :
212- f_rope_scale = config ["rope_scaling" ].get ("factor" )
213- else :
214- f_rope_scale = None
206+
207+ if rope_scaling is not None and (typ := rope_scaling .get ("type" )):
208+ rope_factor = rope_scaling .get ("factor" )
209+ f_rope_scale = rope_factor
210+ if typ == "linear" :
211+ rope_scaling_type = gguf .RopeScalingType .LINEAR
212+ elif typ == "yarn" :
213+ rope_scaling_type = gguf .RopeScalingType .YARN
214+ n_orig_ctx = rope_scaling ['original_max_position_embeddings' ]
215+ rope_finetuned = rope_scaling ['finetuned' ]
216+ else :
217+ raise NotImplementedError (f'Unknown rope scaling type: { typ } ' )
215218
216219 if "max_sequence_length" in config :
217220 n_ctx = config ["max_sequence_length" ]
@@ -222,16 +225,19 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
222225 "Suggestion: provide 'config.json' of the model in the same directory containing model files." )
223226
224227 return Params (
225- n_vocab = n_vocab ,
226- n_embd = n_embd ,
227- n_layer = n_layer ,
228- n_ctx = n_ctx ,
229- n_ff = n_ff ,
230- n_head = n_head ,
231- n_head_kv = n_head_kv ,
232- f_norm_eps = f_norm_eps ,
233- f_rope_freq_base = f_rope_freq_base ,
234- f_rope_scale = f_rope_scale ,
228+ n_vocab = config ["vocab_size" ],
229+ n_embd = config ["hidden_size" ],
230+ n_layer = config ["num_hidden_layers" ],
231+ n_ctx = n_ctx ,
232+ n_ff = config ["intermediate_size" ],
233+ n_head = (n_head := config ["num_attention_heads" ]),
234+ n_head_kv = config .get ("num_key_value_heads" , n_head ),
235+ f_norm_eps = config ["rms_norm_eps" ],
236+ f_rope_freq_base = config .get ("rope_theta" ),
237+ rope_scaling_type = rope_scaling_type ,
238+ f_rope_scale = f_rope_scale ,
239+ n_orig_ctx = n_orig_ctx ,
240+ rope_finetuned = rope_finetuned ,
235241 )
236242
237243 # LLaMA v2 70B params.json
@@ -240,17 +246,8 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
240246 def loadOriginalParamsJson (model : LazyModel , config_path : Path ) -> Params :
241247 config = json .load (open (config_path ))
242248
243- n_vocab = config ["vocab_size" ] if "vocab_size" in config else - 1
244- n_embd = config ["dim" ]
245- n_layer = config ["n_layers" ]
246- n_ff = - 1
247- n_head = config ["n_heads" ]
248- n_head_kv = config ["n_kv_heads" ] if "n_kv_heads" in config else n_head
249- f_norm_eps = config ["norm_eps" ]
250- f_rope_freq_base = config ["rope_theta" ] if "rope_theta" in config else None
251-
252249 # hack to determine LLaMA v1 vs v2 vs CodeLlama
253- if f_rope_freq_base == 1000000 :
250+ if config . get ( "rope_theta" ) == 1000000 :
254251 # CodeLlama
255252 n_ctx = 16384
256253 elif config ["norm_eps" ] == 1e-05 :
@@ -260,22 +257,16 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
260257 # LLaMA v1
261258 n_ctx = 2048
262259
263- if n_vocab == - 1 :
264- n_vocab = model ["tok_embeddings.weight" ].shape [0 ]
265-
266- if n_ff == - 1 :
267- n_ff = model ["layers.0.feed_forward.w1.weight" ].shape [0 ]
268-
269260 return Params (
270- n_vocab = n_vocab ,
271- n_embd = n_embd ,
272- n_layer = n_layer ,
261+ n_vocab = config . get ( "vocab_size" , model [ "tok_embeddings.weight" ]. shape [ 0 ]) ,
262+ n_embd = config [ "dim" ] ,
263+ n_layer = config [ "n_layers" ] ,
273264 n_ctx = n_ctx ,
274- n_ff = n_ff ,
275- n_head = n_head ,
276- n_head_kv = n_head_kv ,
277- f_norm_eps = f_norm_eps ,
278- f_rope_freq_base = f_rope_freq_base ,
265+ n_ff = model [ "layers.0.feed_forward.w1.weight" ]. shape [ 0 ] ,
266+ n_head = ( n_head := config [ "n_heads" ]) ,
267+ n_head_kv = config . get ( "n_kv_heads" , n_head ) ,
268+ f_norm_eps = config [ "norm_eps" ] ,
269+ f_rope_freq_base = config . get ( "rope_theta" ) ,
279270 )
280271
281272 @staticmethod
@@ -831,8 +822,16 @@ def add_meta_arch(self, params: Params) -> None:
831822 if params .f_rope_freq_base is not None :
832823 self .gguf .add_rope_freq_base (params .f_rope_freq_base )
833824
834- if params .f_rope_scale is not None :
835- self .gguf .add_rope_scale_linear (params .f_rope_scale )
825+ if params .rope_scaling_type :
826+ assert params .f_rope_scale is not None
827+ self .gguf .add_rope_scaling_type (params .rope_scaling_type )
828+ self .gguf .add_rope_scaling_factor (params .f_rope_scale )
829+
830+ if params .n_orig_ctx is not None :
831+ self .gguf .add_rope_scaling_orig_ctx_len (params .n_orig_ctx )
832+
833+ if params .rope_finetuned is not None :
834+ self .gguf .add_rope_scaling_finetuned (params .rope_finetuned )
836835
837836 if params .ftype is not None :
838837 self .gguf .add_file_type (params .ftype )
0 commit comments