Cortex-coder / scripts /load_model.py
junaid17's picture
Upload 6 files
58ddd29 verified
raw
history blame contribute delete
842 Bytes
# model_loader.py
from langchain_community.chat_models import ChatLlamaCpp
from huggingface_hub import hf_hub_download
_llm_instance = None
def get_model(callbacks=None):
global _llm_instance
if _llm_instance is None:
print("Downloading model from Hugging Face...")
model_path = hf_hub_download(repo_id="junaid17/qwen2.5-coder-3b-gguf", filename="qwen2.5-coder-3b-instruct.Q4_K_M.gguf")
print("Loading ChatLlamaCpp model for the first time...")
_llm_instance = ChatLlamaCpp(
model_path=model_path,
temperature=0.7,
max_tokens=1000,
n_ctx=4096,
n_batch=512,
n_threads=8,
n_gpu_layers=0,
verbose=False,
)
print("Model loaded successfully!")
return _llm_instance