Spaces:
Running
Running
| # model_loader.py | |
| from langchain_community.chat_models import ChatLlamaCpp | |
| from huggingface_hub import hf_hub_download | |
| _llm_instance = None | |
| def get_model(callbacks=None): | |
| global _llm_instance | |
| if _llm_instance is None: | |
| print("Downloading model from Hugging Face...") | |
| model_path = hf_hub_download(repo_id="junaid17/qwen2.5-coder-3b-gguf", filename="qwen2.5-coder-3b-instruct.Q4_K_M.gguf") | |
| print("Loading ChatLlamaCpp model for the first time...") | |
| _llm_instance = ChatLlamaCpp( | |
| model_path=model_path, | |
| temperature=0.7, | |
| max_tokens=1000, | |
| n_ctx=4096, | |
| n_batch=512, | |
| n_threads=8, | |
| n_gpu_layers=0, | |
| verbose=False, | |
| ) | |
| print("Model loaded successfully!") | |
| return _llm_instance | |