Spaces:

junaid17
/

Cortex-coder

Running

File size: 842 Bytes

58ddd29

# model_loader.py
from langchain_community.chat_models import ChatLlamaCpp
from huggingface_hub import hf_hub_download

_llm_instance = None

def get_model(callbacks=None):
    global _llm_instance
    if _llm_instance is None:
        print("Downloading model from Hugging Face...")
        model_path = hf_hub_download(repo_id="junaid17/qwen2.5-coder-3b-gguf", filename="qwen2.5-coder-3b-instruct.Q4_K_M.gguf")
        print("Loading ChatLlamaCpp model for the first time...")
        _llm_instance = ChatLlamaCpp(
            model_path=model_path,
            temperature=0.7,
            max_tokens=1000,
            n_ctx=4096,
            n_batch=512,
            n_threads=8,
            n_gpu_layers=0,
            verbose=False,
        )
        print("Model loaded successfully!")
    return _llm_instance