llama-cpp installation with model Phi 3 from Microsoft

sudo su -

apt update
apt install -y git git-lfs build-essential cmake python3-pip libopenblas-dev glslc

cd /opt
git clone https://github.com/ggerganov/llama.cpp

groupadd -g 8088 llama
useradd -u 8088 -g llama -G users -c "llama" -d /opt/llama.cpp -m llama

chown -R llama:llama /opt/llama.cpp

#optional exit "root" superuser
#exit

cd llama.cpp
mkdir build
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
cmake --build build --config Release -j$(nproc)

#After the build is complete, the binaries will be located in the following directory: build/bin/

Test via:

/opt/llama.cpp/build/bin/llama-cli --version

/opt/llama.cpp/build/bin/llama-cli --help

#download the models:
cd ../models

mkdir Phi-3-mini-4k-instruct-gguf
cd Phi-3-mini-4k-instruct-gguf
wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf

chown -R llama:llama /opt/llama.cpp

# cat llama.service
[Unit]
Description=Run llama
DefaultDependencies=no
After=network.target

[Install]
WantedBy=multi-user.target

[Service]
Type=simple
User=llama
Group=llama
WorkingDirectory=/opt/llama.cpp
ExecStart=/opt/llama.cpp/build/bin/llama-server -t 2 --ctx-size 768 --no-mmap --host 0.0.0.0 --port 8088 --offline -m /opt/llama.cpp/models/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf
TimeoutStartSec=60
RemainAfterExit=yes

cp -p llama.service /lib/systemd/system/

systemctl daemon-reload
systemctl enable llama.service
systemctl start llama.service