Try wget https://bash.commongrounds.cc/uploads/1751403551_generate_chat_model.sh from the console
#!/bin/bash
# Script to generate a language model in ONNX format from text files
# Supports Debian, Fedora, and Arch-based distributions
# Generates ./resources/chat.onnx
set -e
# Function to detect Linux distribution
detect_distro() {
if [ -f /etc/debian_version ]; then
DISTRO="debian"
elif [ -f /etc/fedora-release ]; then
DISTRO="fedora"
elif [ -f /etc/arch-release ]; then
DISTRO="arch"
else
echo "Error: Unsupported distribution. Only Debian, Fedora, and Arch are supported."
exit 1
fi
echo "Detected distribution: $DISTRO"
}
# Function to install dependencies
install_dependencies() {
echo "Installing dependencies for $DISTRO..."
case $DISTRO in
debian)
sudo apt update
sudo apt install -y python3 python3-pip python3-venv
;;
fedora)
sudo dnf install -y python3 python3-pip python3-virtualenv
;;
arch)
sudo pacman -Syu --noconfirm python python-pip python-virtualenv
;;
esac
# Create and activate a virtual environment
python3 -m venv venv
source venv/bin/activate
# Install Python packages
pip install --upgrade pip
pip install torch transformers optimum[onnxruntime] onnx
}
# Function to create Python script for model generation
create_python_script() {
cat > generate_model.py << 'EOF'
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from optimum.onnxruntime import ORTModelForCausalLM
import torch
# Directory containing text files
TEXT_DIR = "./text"
OUTPUT_PATH = "./resources/chat.onnx"
MODEL_NAME = "distilgpt2"
# Create resources directory if it doesn't exist
os.makedirs("./resources", exist_ok=True)
# Read all text files
def load_text_files():
texts = []
for filename in os.listdir(TEXT_DIR):
if filename.endswith(".txt"):
with open(os.path.join(TEXT_DIR, filename), "r", encoding="utf-8") as f:
texts.append(f.read())
return " ".join(texts)
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Load text data
if not os.path.exists(TEXT_DIR):
print(f"Error: Directory {TEXT_DIR} does not exist.")
exit(1)
text_data = load_text_files()
if not text_data.strip():
print("Error: No valid text data found in ./text")
exit(1)
# Tokenize text (for demonstration; fine-tuning is simplified)
inputs = tokenizer(text_data, return_tensors="pt", truncation=True, max_length=512)
# Fine-tune model (simplified example, adjust for real use)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
for _ in range(1): # Minimal training for demo
outputs = model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Save model to ONNX
ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, export=True)
ort_model.save_pretrained("./resources")
print(f"Model saved as ONNX to {OUTPUT_PATH}")
EOF
}
# Function to generate the ONNX model
generate_model() {
echo "Generating chat.onnx from text files in ./text..."
# Create resources directory
mkdir -p ./resources
# Check if text directory exists
if [ ! -d "./text" ]; then
echo "Error: ./text directory does not exist."
exit 1
}
# Run Python script
source venv/bin/activate
python3 generate_model.py
# Verify output
if [ -f "./resources/chat.onnx" ]; then
echo "Successfully generated ./resources/chat.onnx"
else
echo "Error: Failed to generate ./resources/chat.onnx"
exit 1
fi
}
# Main execution
echo "Starting chat.onnx generation script..."
# Detect distribution
detect_distro
# Install dependencies
install_dependencies
# Create Python script
create_python_script
# Generate the model
generate_model
echo "Script completed successfully!"
BASH to Home