sudo su -
apt update
apt install -y git git-lfs build-essential cmake python3-pip libopenblas-dev glslc
cd /opt
git clone https://github.com/ggerganov/llama.cpp
groupadd -g 8088 llama
useradd -u 8088 -g llama -G users -c "llama" -d /opt/llama.cpp -m llama
chown -R llama:llama /opt/llama.cpp
#optional exit "root" superuser
#exit
cd llama.cpp
mkdir build
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
cmake --build build --config Release -j$(nproc)
#After the build is complete, the binaries will be located in the following directory: build/bin/
Test via:
/opt/llama.cpp/build/bin/llama-cli --version
/opt/llama.cpp/build/bin/llama-cli --help
#download the models:
cd ../models
mkdir Phi-3-mini-4k-instruct-gguf
cd Phi-3-mini-4k-instruct-gguf
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
chown -R llama:llama /opt/llama.cpp
# cat llama.service
[Unit]
Description=Run llama
DefaultDependencies=no
After=network.target
[Install]
WantedBy=multi-user.target
[Service]
Type=simple
User=llama
Group=llama
WorkingDirectory=/opt/llama.cpp
ExecStart=/opt/llama.cpp/build/bin/llama-server -t 2 --ctx-size 768 --no-mmap --host 0.0.0.0 --port 8088 --offline -m /opt/llama.cpp/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf
TimeoutStartSec=60
RemainAfterExit=yes
cp -p llama.service /lib/systemd/system/
systemctl daemon-reload
systemctl enable llama.service
systemctl start llama.service