Which 7B model are you using? 4bit I assume?

eurekin · on Dec 21, 2023

This specifically is:

TheBloke / Mistral-7B-Instruct-v0.2-GGUF / mistral-7b-instruct-v0.2.Q8_0.gguf

and I'm running it in LMStudio with the config:

  {
  "name": "Exported from LM Studio on 21.12.2023, 14:57:43",
  "load_params": {
    "n_ctx": 32768,
    "n_batch": 512,
    "rope_freq_base": 1000000,
    "rope_freq_scale": 1,
    "n_gpu_layers": 100,
    "use_mlock": true,
    "main_gpu": 0,
    "tensor_split": [
      0
    ],
    "seed": -1,
    "f16_kv": true,
    "use_mmap": true
  },
  "inference_params": {
    "n_threads": 4,
    "n_predict": -1,
    "top_k": 40,
    "top_p": 0.95,
    "temp": 0.2,
    "repeat_penalty": 1.1,
    "input_prefix": "[INST]",
    "input_suffix": "[/INST]",
    "antiprompt": [
      "[INST]"
    ],
    "pre_prompt": "Below is an instruction that describes a task. Write a response that appropriately completes the request.",
    "pre_prompt_suffix": "",
    "pre_prompt_prefix": "",
    "seed": -1,
    "tfs_z": 1,
    "typical_p": 1,
    "repeat_last_n": 64,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "n_keep": 0,
    "logit_bias": {},
    "mirostat": 0,
    "mirostat_tau": 5,
    "mirostat_eta": 0.1,
    "memory_f16": true,
    "multiline_input": false,
    "penalize_nl": true
  }
  }