Try wget https://bash.commongrounds.cc/uploads/1751403551_generate_chat_model.sh
from the console
#!/bin/bash # Script to generate a language model in ONNX format from text files # Supports Debian, Fedora, and Arch-based distributions # Generates ./resources/chat.onnx set -e # Function to detect Linux distribution detect_distro() { if [ -f /etc/debian_version ]; then DISTRO="debian" elif [ -f /etc/fedora-release ]; then DISTRO="fedora" elif [ -f /etc/arch-release ]; then DISTRO="arch" else echo "Error: Unsupported distribution. Only Debian, Fedora, and Arch are supported." exit 1 fi echo "Detected distribution: $DISTRO" } # Function to install dependencies install_dependencies() { echo "Installing dependencies for $DISTRO..." case $DISTRO in debian) sudo apt update sudo apt install -y python3 python3-pip python3-venv ;; fedora) sudo dnf install -y python3 python3-pip python3-virtualenv ;; arch) sudo pacman -Syu --noconfirm python python-pip python-virtualenv ;; esac # Create and activate a virtual environment python3 -m venv venv source venv/bin/activate # Install Python packages pip install --upgrade pip pip install torch transformers optimum[onnxruntime] onnx } # Function to create Python script for model generation create_python_script() { cat > generate_model.py << 'EOF' import os from transformers import AutoModelForCausalLM, AutoTokenizer from optimum.onnxruntime import ORTModelForCausalLM import torch # Directory containing text files TEXT_DIR = "./text" OUTPUT_PATH = "./resources/chat.onnx" MODEL_NAME = "distilgpt2" # Create resources directory if it doesn't exist os.makedirs("./resources", exist_ok=True) # Read all text files def load_text_files(): texts = [] for filename in os.listdir(TEXT_DIR): if filename.endswith(".txt"): with open(os.path.join(TEXT_DIR, filename), "r", encoding="utf-8") as f: texts.append(f.read()) return " ".join(texts) # Load model and tokenizer model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Load text data if not os.path.exists(TEXT_DIR): print(f"Error: Directory {TEXT_DIR} does not exist.") exit(1) text_data = load_text_files() if not text_data.strip(): print("Error: No valid text data found in ./text") exit(1) # Tokenize text (for demonstration; fine-tuning is simplified) inputs = tokenizer(text_data, return_tensors="pt", truncation=True, max_length=512) # Fine-tune model (simplified example, adjust for real use) model.train() optimizer = torch.optim.Adam(model.parameters(), lr=5e-5) for _ in range(1): # Minimal training for demo outputs = model(**inputs, labels=inputs["input_ids"]) loss = outputs.loss loss.backward() optimizer.step() optimizer.zero_grad() # Save model to ONNX ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, export=True) ort_model.save_pretrained("./resources") print(f"Model saved as ONNX to {OUTPUT_PATH}") EOF } # Function to generate the ONNX model generate_model() { echo "Generating chat.onnx from text files in ./text..." # Create resources directory mkdir -p ./resources # Check if text directory exists if [ ! -d "./text" ]; then echo "Error: ./text directory does not exist." exit 1 } # Run Python script source venv/bin/activate python3 generate_model.py # Verify output if [ -f "./resources/chat.onnx" ]; then echo "Successfully generated ./resources/chat.onnx" else echo "Error: Failed to generate ./resources/chat.onnx" exit 1 fi } # Main execution echo "Starting chat.onnx generation script..." # Detect distribution detect_distro # Install dependencies install_dependencies # Create Python script create_python_script # Generate the model generate_model echo "Script completed successfully!"BASH to Home