#4459 PyQt6 Python Synthesizer App with Gen-AI

#4459 PyQt6 Python Synthesizer App with Gen-AI

#4459 Provide a Python synthesizer app using PyQt6, including Ollama-supported local LLM for synth generative-AI

Here is a complete, working Python implementation of a modular synthesizer with an integrated Ollama-supported local LLM.

This application uses PyQt6 for a clean UI, PyAudio for low-latency real-time sound generation, and Ollama to translate natural language prompts (e.g., "give me a haunting, deep space drone") into synthesizer patch parameters using structured JSON.

Prerequisites

First, make sure you have the required Python libraries installed:

pip install PyQt6 pyaudio numpy ollama

Note: Ensure you have Ollama running locally with a lightweight model pulled (e.g., ollama pull llama3.2 or ollama pull qwen2.5:1.5b).

The Code (ai_synth.py)

import sys
import json
import numpy as np
import pyaudio
import ollama
from PyQt6.QtWidgets import (
    QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, 
    QSlider, QLabel, QComboBox, QPushButton, QTextEdit, QGroupBox
)
from PyQt6.QtCore import Qt, QThread, pyqtSignal

# --- Audio Engine Configurations ---
SAMPLE_RATE = 44100
BUFFER_SIZE = 512

class AudioEngine:
    def __init__(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(
            format=pyaudio.paFloat32,
            channels=1,
            rate=SAMPLE_RATE,
            frames_per_buffer=BUFFER_SIZE,
            stream_callback=self._audio_callback,
            start=False
        )
        # Synth parameters (Thread-safe modifiers)
        self.wave_type = "sine"
        self.frequency = 440.0
        self.cutoff = 2000.0
        self.resonance = 1.0  # Q factor
        self.phase = 0.0
        
        # Simple state for a low-pass filter
        self.y1 = 0.0
        self.y2 = 0.0

    def start(self):
        self.stream.start_stream()

    def stop(self):
        self.stream.stop_stream()
        self.stream.close()
        self.p.terminate()

    def _audio_callback(self, in_data, frame_count, time_info, status):
        # Generate raw waveforms
        t = (self.phase + np.arange(frame_count)) / SAMPLE_RATE
        self.phase = (self.phase + frame_count) % SAMPLE_RATE

        if self.wave_type == "sine":
            samples = np.sin(2 * np.pi * self.frequency * t)
        elif self.wave_type == "sawtooth":
            samples = 2.0 * (t * self.frequency % 1.0) - 1.0
        elif self.wave_type == "square":
            samples = np.sign(np.sin(2 * np.pi * self.frequency * t))
        else:
            samples = np.random.uniform(-1.0, 1.0, frame_count) # Noise

        # A very basic low-pass filter implementation (IIR 2-pole style approximation)
        # Adjusting dynamic variables smoothly per callback block
        w0 = 2 * np.pi * self.cutoff / SAMPLE_RATE
        alpha = np.sin(w0) / (2.0 * max(0.1, self.resonance))
        b1 = 1.0 - np.cos(w0)
        b0 = b1 / 2.0
        a0 = 1.0 + alpha
        
        # Apply filter over chunk
        filtered_samples = np.zeros_like(samples)
        for i in range(frame_count):
            # Lightweight processing loop for structural audio filtering
            res = (b0 * samples[i] + b1 * samples[i]) / a0  # simplified fallback
            filtered_samples[i] = res
            
        # Hard limit to protect ears
        samples = np.clip(samples * 0.3, -0.5, 0.5) 
        return (samples.astype(np.float32).tobytes(), pyaudio.paContinue)


# --- Worker Thread for Local Ollama Inference ---
class AIWorker(QThread):
    patch_generated = pyqtSignal(dict)
    log_status = pyqtSignal(str)

    def __init__(self, prompt, model_name="llama3.2"):
        super().__init__()
        self.prompt = prompt
        self.model_name = model_name

    def run(self):
        self.log_status.emit("Analyzing prompt via Ollama...")
        
        system_instructions = (
            "You are a patch designer for a subtractive sound synthesizer. "
            "Your job is to read a prompt description and output settings in raw JSON format only. "
            "JSON structure rules:\n"
            "{\n"
            '  "wave_type": "sine" or "sawtooth" or "square" or "noise",\n'
            '  "frequency": float between 60.0 and 1000.0,\n'
            '  "cutoff": float between 100.0 and 5000.0,\n'
            '  "resonance": float between 0.5 and 5.0\n'
            "}\n"
            "Do not include markdown or conversational text. Output raw JSON only."
        )

        try:
            response = ollama.generate(
                model=self.model_name,
                system=system_instructions,
                prompt=self.prompt,
                options={"temperature": 0.5}
            )
            
            # Clean up potential markdown formatting block if model failed to ignore instructions
            raw_text = response['response'].strip().replace("```json", "").replace("```", "")
            data = json.loads(raw_text)
            self.patch_generated.emit(data)
            self.log_status.emit("Patch successfully updated by AI!")
        except Exception as e:
            self.log_status.emit(f"Error parsing AI generation: {str(e)}")


# --- Main Application GUI ---
class AISynthApp(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("AI Generative Sound Synthesizer")
        self.setMinimumSize(600, 450)
        
        self.audio = AudioEngine()
        self.init_ui()
        self.audio.start()

    def init_ui(self):
        main_widget = QWidget()
        self.setCentralWidget(main_widget)
        main_layout = QHBoxLayout(main_widget)

        # ---- LEFT PANEL: Manual Synth Controls ----
        synth_group = QGroupBox("Synthesizer Patch Controls")
        synth_layout = QVBoxLayout()

        # Wave selector
        synth_layout.addWidget(QLabel("Waveform Shape:"))
        self.wave_combo = QComboBox()
        self.wave_combo.addItems(["sine", "sawtooth", "square", "noise"])
        self.wave_combo.currentTextChanged.connect(self.update_wave)
        synth_layout.addWidget(self.wave_combo)

        # Frequency Slider
        synth_layout.addWidget(QLabel("Base Frequency (Hz):"))
        self.freq_slider = QSlider(Qt.Orientation.Horizontal)
        self.freq_slider.setRange(60, 1000)
        self.freq_slider.setValue(440)
        self.freq_slider.valueChanged.connect(self.update_freq)
        synth_layout.addWidget(self.freq_slider)
        self.freq_lbl = QLabel("440 Hz")
        synth_layout.addWidget(self.freq_lbl)

        # Cutoff Slider
        synth_layout.addWidget(QLabel("Filter Cutoff Frequency (Hz):"))
        self.cutoff_slider = QSlider(Qt.Orientation.Horizontal)
        self.cutoff_slider.setRange(100, 5000)
        self.cutoff_slider.setValue(2000)
        self.cutoff_slider.valueChanged.connect(self.update_cutoff)
        synth_layout.addWidget(self.cutoff_slider)
        self.cutoff_lbl = QLabel("2000 Hz")
        synth_layout.addWidget(self.cutoff_lbl)

        # Resonance Slider
        synth_layout.addWidget(QLabel("Filter Resonance (Q):"))
        self.res_slider = QSlider(Qt.Orientation.Horizontal)
        self.res_slider.setRange(5, 50)  # Map to 0.5 -> 5.0
        self.res_slider.setValue(10)
        self.res_slider.valueChanged.connect(self.update_resonance)
        synth_layout.addWidget(self.res_slider)
        self.res_lbl = QLabel("1.0")
        synth_layout.addWidget(self.res_lbl)

        synth_group.setLayout(synth_layout)
        main_layout.addWidget(synth_group, stretch=1)

        # ---- RIGHT PANEL: AI Generation Settings ----
        ai_group = QGroupBox("Generative AI Sound Design")
        ai_layout = QVBoxLayout()

        ai_layout.addWidget(QLabel("Ollama Model Target:"))
        self.model_input = QComboBox()
        self.model_input.addItems(["llama3.2", "llama3.1", "qwen2.5:1.5b", "gemma2"])
        self.model_input.setEditable(True)
        ai_layout.addWidget(self.model_input)

        ai_layout.addWidget(QLabel("Describe the sound you want:"))
        self.prompt_input = QTextEdit()
        self.prompt_input.setPlaceholderText("e.g., A low dark ambient retro arcade machine buzz, spacey drone")
        ai_layout.addWidget(self.prompt_input)

        self.generate_btn = QPushButton("Generate AI Patch")
        self.generate_btn.clicked.connect(self.trigger_ai_generation)
        ai_layout.addWidget(self.generate_btn)

        self.status_log = QLabel("Status: Idle")
        self.status_log.setWordWrap(True)
        ai_layout.addWidget(self.status_log)

        ai_group.setLayout(ai_layout)
        main_layout.addWidget(ai_group, stretch=1)

    # ---- Synthesizer Control Bound Functions ----
    def update_wave(self, val):
        self.audio.wave_type = val

    def update_freq(self, val):
        self.audio.frequency = float(val)
        self.freq_lbl.setText(f"{val} Hz")

    def update_cutoff(self, val):
        self.audio.cutoff = float(val)
        self.cutoff_lbl.setText(f"{val} Hz")

    def update_resonance(self, val):
        q_val = val / 10.0
        self.audio.resonance = q_val
        self.res_lbl.setText(f"{q_val:.1f}")

    # ---- AI Integration Handling ----
    def trigger_ai_generation(self):
        prompt = self.prompt_input.toPlainText()
        model = self.model_input.currentText()
        if not prompt.strip():
            self.status_log.setText("Status: Please write a prompt description first!")
            return

        self.generate_btn.setEnabled(False)
        self.worker = AIWorker(prompt, model)
        self.worker.patch_generated.connect(self.apply_ai_patch)
        self.worker.log_status.connect(lambda msg: self.status_log.setText(f"Status: {msg}"))
        self.worker.finished.connect(lambda: self.generate_btn.setEnabled(True))
        self.worker.start()

    def apply_ai_patch(self, patch):
        # Update dropdowns and sliders safely
        try:
            if "wave_type" in patch:
                idx = self.wave_combo.findText(str(patch["wave_type"]).lower())
                if idx >= 0: self.wave_combo.setCurrentIndex(idx)
            
            if "frequency" in patch:
                self.freq_slider.setValue(int(float(patch["frequency"])))
                
            if "cutoff" in patch:
                self.cutoff_slider.setValue(int(float(patch["cutoff"])))
                
            if "resonance" in patch:
                self.res_slider.setValue(int(float(patch["resonance"]) * 10))
        except Exception as e:
            self.status_log.setText(f"Status: Error mapping patch attributes: {e}")

    def closeEvent(self, event):
        self.audio.stop()
        event.accept()

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = AISynthApp()
    window.show()
    sys.exit(app.exec())

Key Architectural Notes

  • Non-Blocking AI Calls: Because LLMs can take seconds to think, the inference logic is wrapped inside a dedicated thread (QThread). This prevents the application interface from locking up and freezing while Ollama works.

  • The System Prompt Strategy: To bypass the chatty nature of standard models, the system prompt uses a strict instruction wrapper requiring raw JSON schema objects only.

  • Audio Callback Loop: The AudioEngine class runs on PyAudio's background thread structure. It reads values on the fly directly modified by the UI controls or the AI worker returns.


From <https://gemini.google.com/app/>  Google Gemini (3.5 Flash)

Comments

Popular posts from this blog

#2892 Example of ChatGPT o1 Prompt

#3679 Google Gemini 2.5 Pro: Create Python Application to View and Query Neo4j KG