BASH Post Services

Viewing: 1751403551_generate_chat_model.sh

Try wget https://bash.commongrounds.cc/uploads/1751403551_generate_chat_model.sh from the console
#!/bin/bash

# Script to generate a language model in ONNX format from text files
# Supports Debian, Fedora, and Arch-based distributions
# Generates ./resources/chat.onnx

set -e

# Function to detect Linux distribution
detect_distro() {
    if [ -f /etc/debian_version ]; then
        DISTRO="debian"
    elif [ -f /etc/fedora-release ]; then
        DISTRO="fedora"
    elif [ -f /etc/arch-release ]; then
        DISTRO="arch"
    else
        echo "Error: Unsupported distribution. Only Debian, Fedora, and Arch are supported."
        exit 1
    fi
    echo "Detected distribution: $DISTRO"
}

# Function to install dependencies
install_dependencies() {
    echo "Installing dependencies for $DISTRO..."

    case $DISTRO in
        debian)
            sudo apt update
            sudo apt install -y python3 python3-pip python3-venv
            ;;
        fedora)
            sudo dnf install -y python3 python3-pip python3-virtualenv
            ;;
        arch)
            sudo pacman -Syu --noconfirm python python-pip python-virtualenv
            ;;
    esac

    # Create and activate a virtual environment
    python3 -m venv venv
    source venv/bin/activate

    # Install Python packages
    pip install --upgrade pip
    pip install torch transformers optimum[onnxruntime] onnx
}

# Function to create Python script for model generation
create_python_script() {
    cat > generate_model.py << 'EOF'
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from optimum.onnxruntime import ORTModelForCausalLM
import torch

# Directory containing text files
TEXT_DIR = "./text"
OUTPUT_PATH = "./resources/chat.onnx"
MODEL_NAME = "distilgpt2"

# Create resources directory if it doesn't exist
os.makedirs("./resources", exist_ok=True)

# Read all text files
def load_text_files():
    texts = []
    for filename in os.listdir(TEXT_DIR):
        if filename.endswith(".txt"):
            with open(os.path.join(TEXT_DIR, filename), "r", encoding="utf-8") as f:
                texts.append(f.read())
    return " ".join(texts)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Load text data
if not os.path.exists(TEXT_DIR):
    print(f"Error: Directory {TEXT_DIR} does not exist.")
    exit(1)

text_data = load_text_files()
if not text_data.strip():
    print("Error: No valid text data found in ./text")
    exit(1)

# Tokenize text (for demonstration; fine-tuning is simplified)
inputs = tokenizer(text_data, return_tensors="pt", truncation=True, max_length=512)

# Fine-tune model (simplified example, adjust for real use)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
for _ in range(1):  # Minimal training for demo
    outputs = model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

# Save model to ONNX
ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, export=True)
ort_model.save_pretrained("./resources")
print(f"Model saved as ONNX to {OUTPUT_PATH}")
EOF
}

# Function to generate the ONNX model
generate_model() {
    echo "Generating chat.onnx from text files in ./text..."

    # Create resources directory
    mkdir -p ./resources

    # Check if text directory exists
    if [ ! -d "./text" ]; then
        echo "Error: ./text directory does not exist."
        exit 1
    }

    # Run Python script
    source venv/bin/activate
    python3 generate_model.py

    # Verify output
    if [ -f "./resources/chat.onnx" ]; then
        echo "Successfully generated ./resources/chat.onnx"
    else
        echo "Error: Failed to generate ./resources/chat.onnx"
        exit 1
    fi
}

# Main execution
echo "Starting chat.onnx generation script..."

# Detect distribution
detect_distro

# Install dependencies
install_dependencies

# Create Python script
create_python_script

# Generate the model
generate_model

echo "Script completed successfully!"
BASH to Home