#4459 PyQt6 Python Synthesizer App with Gen-AI
#4459 PyQt6 Python Synthesizer App with Gen-AI
#4459 Provide a Python synthesizer app using PyQt6, including Ollama-supported local LLM for synth generative-AI
Here is a complete, working Python implementation of a modular synthesizer with an integrated Ollama-supported local LLM.
This application uses PyQt6 for a clean UI, PyAudio for low-latency real-time sound generation, and Ollama to translate natural language prompts (e.g., "give me a haunting, deep space drone") into synthesizer patch parameters using structured JSON.
Prerequisites
First, make sure you have the required Python libraries installed:
pip install PyQt6 pyaudio numpy ollama
Note: Ensure you have ollama pull llama3.2 or ollama pull qwen2.5:1.5b).
The Code (ai_synth.py)
import sys
import json
import numpy as np
import pyaudio
import ollama
from PyQt6.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QSlider, QLabel, QComboBox, QPushButton, QTextEdit, QGroupBox
)
from PyQt6.QtCore import Qt, QThread, pyqtSignal
# --- Audio Engine Configurations ---
SAMPLE_RATE = 44100
BUFFER_SIZE = 512
class AudioEngine:
def __init__(self):
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format=pyaudio.paFloat32,
channels=1,
rate=SAMPLE_RATE,
frames_per_buffer=BUFFER_SIZE,
stream_callback=self._audio_callback,
start=False
)
# Synth parameters (Thread-safe modifiers)
self.wave_type = "sine"
self.frequency = 440.0
self.cutoff = 2000.0
self.resonance = 1.0 # Q factor
self.phase = 0.0
# Simple state for a low-pass filter
self.y1 = 0.0
self.y2 = 0.0
def start(self):
self.stream.start_stream()
def stop(self):
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
def _audio_callback(self, in_data, frame_count, time_info, status):
# Generate raw waveforms
t = (self.phase + np.arange(frame_count)) / SAMPLE_RATE
self.phase = (self.phase + frame_count) % SAMPLE_RATE
if self.wave_type == "sine":
samples = np.sin(2 * np.pi * self.frequency * t)
elif self.wave_type == "sawtooth":
samples = 2.0 * (t * self.frequency % 1.0) - 1.0
elif self.wave_type == "square":
samples = np.sign(np.sin(2 * np.pi * self.frequency * t))
else:
samples = np.random.uniform(-1.0, 1.0, frame_count) # Noise
# A very basic low-pass filter implementation (IIR 2-pole style approximation)
# Adjusting dynamic variables smoothly per callback block
w0 = 2 * np.pi * self.cutoff / SAMPLE_RATE
alpha = np.sin(w0) / (2.0 * max(0.1, self.resonance))
b1 = 1.0 - np.cos(w0)
b0 = b1 / 2.0
a0 = 1.0 + alpha
# Apply filter over chunk
filtered_samples = np.zeros_like(samples)
for i in range(frame_count):
# Lightweight processing loop for structural audio filtering
res = (b0 * samples[i] + b1 * samples[i]) / a0 # simplified fallback
filtered_samples[i] = res
# Hard limit to protect ears
samples = np.clip(samples * 0.3, -0.5, 0.5)
return (samples.astype(np.float32).tobytes(), pyaudio.paContinue)
# --- Worker Thread for Local Ollama Inference ---
class AIWorker(QThread):
patch_generated = pyqtSignal(dict)
log_status = pyqtSignal(str)
def __init__(self, prompt, model_name="llama3.2"):
super().__init__()
self.prompt = prompt
self.model_name = model_name
def run(self):
self.log_status.emit("Analyzing prompt via Ollama...")
system_instructions = (
"You are a patch designer for a subtractive sound synthesizer. "
"Your job is to read a prompt description and output settings in raw JSON format only. "
"JSON structure rules:\n"
"{\n"
' "wave_type": "sine" or "sawtooth" or "square" or "noise",\n'
' "frequency": float between 60.0 and 1000.0,\n'
' "cutoff": float between 100.0 and 5000.0,\n'
' "resonance": float between 0.5 and 5.0\n'
"}\n"
"Do not include markdown or conversational text. Output raw JSON only."
)
try:
response = ollama.generate(
model=self.model_name,
system=system_instructions,
prompt=self.prompt,
options={"temperature": 0.5}
)
# Clean up potential markdown formatting block if model failed to ignore instructions
raw_text = response['response'].strip().replace("```json", "").replace("```", "")
data = json.loads(raw_text)
self.patch_generated.emit(data)
self.log_status.emit("Patch successfully updated by AI!")
except Exception as e:
self.log_status.emit(f"Error parsing AI generation: {str(e)}")
# --- Main Application GUI ---
class AISynthApp(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("AI Generative Sound Synthesizer")
self.setMinimumSize(600, 450)
self.audio = AudioEngine()
self.init_ui()
self.audio.start()
def init_ui(self):
main_widget = QWidget()
self.setCentralWidget(main_widget)
main_layout = QHBoxLayout(main_widget)
# ---- LEFT PANEL: Manual Synth Controls ----
synth_group = QGroupBox("Synthesizer Patch Controls")
synth_layout = QVBoxLayout()
# Wave selector
synth_layout.addWidget(QLabel("Waveform Shape:"))
self.wave_combo = QComboBox()
self.wave_combo.addItems(["sine", "sawtooth", "square", "noise"])
self.wave_combo.currentTextChanged.connect(self.update_wave)
synth_layout.addWidget(self.wave_combo)
# Frequency Slider
synth_layout.addWidget(QLabel("Base Frequency (Hz):"))
self.freq_slider = QSlider(Qt.Orientation.Horizontal)
self.freq_slider.setRange(60, 1000)
self.freq_slider.setValue(440)
self.freq_slider.valueChanged.connect(self.update_freq)
synth_layout.addWidget(self.freq_slider)
self.freq_lbl = QLabel("440 Hz")
synth_layout.addWidget(self.freq_lbl)
# Cutoff Slider
synth_layout.addWidget(QLabel("Filter Cutoff Frequency (Hz):"))
self.cutoff_slider = QSlider(Qt.Orientation.Horizontal)
self.cutoff_slider.setRange(100, 5000)
self.cutoff_slider.setValue(2000)
self.cutoff_slider.valueChanged.connect(self.update_cutoff)
synth_layout.addWidget(self.cutoff_slider)
self.cutoff_lbl = QLabel("2000 Hz")
synth_layout.addWidget(self.cutoff_lbl)
# Resonance Slider
synth_layout.addWidget(QLabel("Filter Resonance (Q):"))
self.res_slider = QSlider(Qt.Orientation.Horizontal)
self.res_slider.setRange(5, 50) # Map to 0.5 -> 5.0
self.res_slider.setValue(10)
self.res_slider.valueChanged.connect(self.update_resonance)
synth_layout.addWidget(self.res_slider)
self.res_lbl = QLabel("1.0")
synth_layout.addWidget(self.res_lbl)
synth_group.setLayout(synth_layout)
main_layout.addWidget(synth_group, stretch=1)
# ---- RIGHT PANEL: AI Generation Settings ----
ai_group = QGroupBox("Generative AI Sound Design")
ai_layout = QVBoxLayout()
ai_layout.addWidget(QLabel("Ollama Model Target:"))
self.model_input = QComboBox()
self.model_input.addItems(["llama3.2", "llama3.1", "qwen2.5:1.5b", "gemma2"])
self.model_input.setEditable(True)
ai_layout.addWidget(self.model_input)
ai_layout.addWidget(QLabel("Describe the sound you want:"))
self.prompt_input = QTextEdit()
self.prompt_input.setPlaceholderText("e.g., A low dark ambient retro arcade machine buzz, spacey drone")
ai_layout.addWidget(self.prompt_input)
self.generate_btn = QPushButton("Generate AI Patch")
self.generate_btn.clicked.connect(self.trigger_ai_generation)
ai_layout.addWidget(self.generate_btn)
self.status_log = QLabel("Status: Idle")
self.status_log.setWordWrap(True)
ai_layout.addWidget(self.status_log)
ai_group.setLayout(ai_layout)
main_layout.addWidget(ai_group, stretch=1)
# ---- Synthesizer Control Bound Functions ----
def update_wave(self, val):
self.audio.wave_type = val
def update_freq(self, val):
self.audio.frequency = float(val)
self.freq_lbl.setText(f"{val} Hz")
def update_cutoff(self, val):
self.audio.cutoff = float(val)
self.cutoff_lbl.setText(f"{val} Hz")
def update_resonance(self, val):
q_val = val / 10.0
self.audio.resonance = q_val
self.res_lbl.setText(f"{q_val:.1f}")
# ---- AI Integration Handling ----
def trigger_ai_generation(self):
prompt = self.prompt_input.toPlainText()
model = self.model_input.currentText()
if not prompt.strip():
self.status_log.setText("Status: Please write a prompt description first!")
return
self.generate_btn.setEnabled(False)
self.worker = AIWorker(prompt, model)
self.worker.patch_generated.connect(self.apply_ai_patch)
self.worker.log_status.connect(lambda msg: self.status_log.setText(f"Status: {msg}"))
self.worker.finished.connect(lambda: self.generate_btn.setEnabled(True))
self.worker.start()
def apply_ai_patch(self, patch):
# Update dropdowns and sliders safely
try:
if "wave_type" in patch:
idx = self.wave_combo.findText(str(patch["wave_type"]).lower())
if idx >= 0: self.wave_combo.setCurrentIndex(idx)
if "frequency" in patch:
self.freq_slider.setValue(int(float(patch["frequency"])))
if "cutoff" in patch:
self.cutoff_slider.setValue(int(float(patch["cutoff"])))
if "resonance" in patch:
self.res_slider.setValue(int(float(patch["resonance"]) * 10))
except Exception as e:
self.status_log.setText(f"Status: Error mapping patch attributes: {e}")
def closeEvent(self, event):
self.audio.stop()
event.accept()
if __name__ == "__main__":
app = QApplication(sys.argv)
window = AISynthApp()
window.show()
sys.exit(app.exec())
Key Architectural Notes
Non-Blocking AI Calls: Because LLMs can take seconds to think, the inference logic is wrapped inside a dedicated thread (
QThread). This prevents the application interface from locking up and freezing while Ollama works.The System Prompt Strategy: To bypass the chatty nature of standard models, the system prompt uses a strict instruction wrapper requiring raw JSON schema objects only.
Audio Callback Loop: The
AudioEngineclass runs on PyAudio's background thread structure. It reads values on the fly directly modified by the UI controls or the AI worker returns.
From <https://gemini.google.com/app/> Google Gemini (3.5 Flash)

Comments
Post a Comment