#3824 Librosa MP3 Art Generator
#3824 Librosa MP3 Art Generator
Here is a Python application that uses librosa to extract data from MP3 files and Pillow (PIL) to generate "art" in five different styles.
This script will be a command-line tool. You provide it with a directory of your MP3s and the style you want to create, and it will save the resulting image.
Prerequisites
You must have several Python libraries installed. You can install them all with this command:
pip install librosa numpy pillow matplotlib
librosa: For loading audio and extracting features (spectrograms, chromagrams, etc.).
numpy: For all the numerical data manipulation.
pillow (PIL): For creating and saving the final image.
matplotlib: We use this to get access to its scientific color maps (like 'viridis' or 'magma') to colorize the data.
🎨 The Python Art Generator
Save the following code as audio_art.py:
import librosa
import numpy as np
from PIL import Image, ImageOps
import os
import argparse
import glob
import random
import matplotlib.pyplot as plt
# --- Configuration ---
# Use 'agg' backend for matplotlib, as we're not 'showing' a plot
plt.switch_backend('agg')
# Standard size for spectrograms to make grids consistent
SPEC_SIZE = (400, 300) # (width, height)
# Number of files to process
NUM_FILES = 8
# --- Helper Functions ---
def find_mp3s(input_dir, num_files=NUM_FILES):
"""Finds up to num_files MP3s in a directory."""
print(f"Scanning {input_dir} for MP3 files...")
search_path = os.path.join(input_dir, "**", "*.mp3")
files = glob.glob(search_path, recursive=True)
if not files:
print("Error: No MP3 files found.")
return []
# Shuffle and pick the specified number of files
random.shuffle(files)
selected_files = files[:num_files]
if len(selected_files) < num_files:
print(f"Warning: Found only {len(selected_files)} MP3s. Using all found.")
print(f"Found {len(selected_files)} files to process.")
return selected_files
def array_to_image(arr, colormap_name='viridis', size=None):
"""
Converts a 2D numpy array into a colorized PIL Image.
"""
# Normalize array to 0-1 for the colormap
if arr.max() > arr.min():
norm_arr = (arr - arr.min()) / (arr.max() - arr.min())
else:
norm_arr = np.zeros(arr.shape)
cmap = plt.get_cmap(colormap_name)
img_data = cmap(norm_arr) # This gives (H, W, 4) -> RGBA
# Convert to (H, W, 3) and 0-255 uint8
img_data_uint8 = (img_data[:, :, :3] * 255).astype(np.uint8)
img = Image.fromarray(img_data_uint8)
if size:
# Use NEAREST for blocky resize, LANCZOS for smooth
resample_filter = Image.Resampling.LANCZOS
if colormap_name == 'blocky': # Special case for our 'stripes' style
resample_filter = Image.Resampling.NEAREST
img = img.resize(size, resample_filter)
return img
def get_feature(file_path, feature_type='melspec'):
"""Loads a file and computes a specific librosa feature."""
try:
y, sr = librosa.load(file_path, duration=60) # Load 60 seconds
except Exception as e:
print(f"Error loading {file_path}: {e}")
return None
if feature_type == 'melspec':
# Convert to log-power Mel spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
log_S = librosa.power_to_db(S, ref=np.max)
return log_S
elif feature_type == 'chroma':
# Chromagram
C = librosa.feature.chroma_stft(y=y, sr=sr)
return C
elif feature_type == 'hpss':
# Harmonic-Percussive Source Separation
y_h, y_p = librosa.effects.hpss(y)
# Return spectrograms for both
S_h = librosa.power_to_db(librosa.feature.melspectrogram(y=y_h, sr=sr), ref=np.max)
S_p = librosa.power_to_db(librosa.feature.melspectrogram(y=y_p, sr=sr), ref=np.max)
return S_h, S_p
return None
# --- Art Style 1: Spectrogram Montage ---
def create_montage(files):
"""Tiles 8 spectrograms into a 4x2 grid."""
print("Creating style: Spectrogram Montage")
images = []
for f in files:
spec = get_feature(f, 'melspec')
if spec is not None:
images.append(array_to_image(spec, 'magma', SPEC_SIZE))
if not images:
return None
# Create a 4x2 grid
grid_w, grid_h = 4, 2
img_w, img_h = SPEC_SIZE
# Fill remaining spots if we have fewer than 8 images
while len(images) < grid_w * grid_h:
images.append(Image.new('RGB', SPEC_SIZE, (0, 0, 0)))
# Create the canvas
canvas = Image.new('RGB', (img_w * grid_w, img_h * grid_h))
for i, img in enumerate(images):
x = (i % grid_w) * img_w
y = (i // grid_w) * img_h
canvas.paste(img, (x, y))
return canvas
# --- Art Style 2: Chromatic Flow ---
def create_chromatic_flow(files):
"""Horizontally stacks 8 chromagrams (pitch maps)."""
print("Creating style: Chromatic Flow")
chroma_list = []
for f in files:
chroma = get_feature(f, 'chroma')
if chroma is not None:
chroma_list.append(chroma)
if not chroma_list:
return None
# Find the shortest chromagram to truncate all to the same length
min_len = min(c.shape[1] for c in chroma_list)
chroma_list = [c[:, :min_len] for c in chroma_list]
# Stack them horizontally
full_chroma = np.hstack(chroma_list)
# 'hsv' colormap is great for cyclic data like pitch
return array_to_image(full_chroma, 'hsv', (full_chroma.shape[1], 400))
# --- Art Style 3: Pixel Sort Glitch ---
def create_glitch_art(file_path):
"""Applies a pixel-sorting 'glitch' effect to one spectrogram."""
print("Creating style: Pixel Sort Glitch")
spec = get_feature(file_path, 'melspec')
if spec is None:
return None
# Get a color image
img = array_to_image(spec, 'inferno', (800, 600))
data = np.array(img) # (H, W, 3)
# A simple column-based pixel sort
for x in range(data.shape[1]):
col = data[:, x, :] # Get the column
# Calculate brightness for sorting
# Luminance = 0.299*R + 0.587*G + 0.114*B
brightness = col[:, 0] * 0.299 + col[:, 1] * 0.587 + col[:, 2] * 0.114
# Find a "dark" spot to start sorting
try:
start_y = np.where(brightness < 50)[0][0] # Find first dark pixel
# Sort all pixels from start_y to the end of the column
sorted_indices = np.argsort(brightness[start_y:])
data[start_y:, x, :] = col[start_y:][sorted_indices]
except IndexError:
# No dark pixels in this column, skip
pass
return Image.fromarray(data)
# --- Art Style 4: Harmonic/Percussive Weave ---
def create_weave(files):
"""Creates a 4x4 grid of 8 harmonic + 8 percussive spectrograms."""
print("Creating style: Harmonic/Percussive Weave")
images = []
for f in files:
features = get_feature(f, 'hpss')
if features is not None:
s_h, s_p = features
# Harmonic in 'cool', Percussive in 'hot'
images.append(array_to_image(s_h, 'cool', (250, 250)))
images.append(array_to_image(s_p, 'hot', (250, 250)))
if not images:
return None
# Create a 4x4 grid
grid_w, grid_h = 4, 4
img_w, img_h = (250, 250)
# Fill remaining spots if needed
while len(images) < grid_w * grid_h:
images.append(Image.new('RGB', (img_w, img_h), (0, 0, 0)))
canvas = Image.new('RGB', (img_w * grid_w, img_h * grid_h))
for i, img in enumerate(images):
x = (i % grid_w) * img_w
y = (i // grid_w) * img_h
canvas.paste(img, (x, y))
return canvas
# --- Art Style 5: Timbre Stripes ---
def create_timbre_stripes(files):
"""Creates 8 horizontal stripes of each song's average timbre."""
print("Creating style: Timbre Stripes")
avg_spectra = []
for f in files:
spec = get_feature(f, 'melspec')
if spec is not None:
# Average across time (axis=1) to get average spectrum
avg_spec = np.mean(spec, axis=1) # Shape (n_mels,)
avg_spectra.append(avg_spec)
if not avg_spectra:
return None
# Stack the 1D arrays into a 2D array
# Shape will be (num_files, n_mels)
data = np.stack(avg_spectra, axis=0)
# Use 'blocky' as a signal to use NEAREST resampling
return array_to_image(data, colormap_name='blocky', size=(1280, 720))
# --- Main Application Logic ---
def main():
parser = argparse.ArgumentParser(description="Generate generative art from MP3 files.")
parser.add_argument("input_dir", type=str,
help="Directory containing your MP3 files.")
parser.add_argument("style", type=str,
choices=['montage', 'chromatic', 'glitch', 'weave', 'stripes'],
help="The art style to generate.")
parser.add_argument("-o", "--output", type=str, default="audio_art.png",
help="Output image file name (e.g., art.png)")
args = parser.parse_args()
# 1. Find files
files = find_mp3s(args.input_dir, NUM_FILES)
if not files:
return
# 2. Select and run the art generator
img = None
if args.style == 'montage':
img = create_montage(files)
elif args.style == 'chromatic':
img = create_chromatic_flow(files)
elif args.style == 'glitch':
img = create_glitch_art(files[0]) # Glitch only uses the first file
elif args.style == 'weave':
img = create_weave(files)
elif args.style == 'stripes':
img = create_timbre_stripes(files)
# 3. Save the result
if img:
img.save(args.output)
print(f"✨ Successfully created art and saved to {args.output}")
else:
print("Error: Could not generate image.")
if __name__ == "__main__":
main()
🚀 How to Run the Application
Save the code as
audio_art.py.Open your terminal or command prompt.
Navigate to the directory where you saved
audio_art.py.Run the script by pointing it to a directory of your MP3s and choosing a style.
Let's say you have a folder of music located at C:\Users\YourName\Music (Windows) or /home/yourname/music (Linux/Mac).
Example Commands:
To create the 'montage' style:
python audio_art.py /home/yourname/music montage -o montage_art.pngTo create the 'glitch' style:
python audio_art.py C:\Users\YourName\Music glitch -o my_glitch.pngTo create the 'weave' style:
python audio_art.py ./my_songs weave -o weave_pattern.pngTo create the 'timbre stripes' style:
python audio_art.py /path/to/my/songs stripes -o abstract_stripes.png
From <https://gemini.google.com/app/fa2795f8776f0d78> Google Gemini 2.5 Pro
Google AI Studio
Create a web application to read 8 or more arrays of "librosa" MP3 generated data to create any on of five user-selected art styles from the data. The user should be able to select an MP3 for upload and style of art, including color pallet.
Provide a sixth style as a combination of cosmic ripples with neural synapse. Also play the MP3
Of course! I've added a new art style, "Synaptic Waves," which elegantly combines the radial energy of "Cosmic Ripples" with the interconnected structure of "Neural Synapse."
Create two more styles that produce "splotches" or patches of colorful blobs that fill the screen
Of course! I've added two new art styles, "Chromatic Splotches" and "Melodic Inkblots," which create vibrant, blob-like patterns that fill the canvas.
Chromatic Splotches: This style generates a dynamic array of colorful circles. The size, position, and color of each "splotch" correspond to different features of the audio, like its energy and brightness, creating a lively, dotted texture. Melodic Inkblots: This style uses a physics-based simulation to create soft, organic shapes that gently push against each other. It's like watching colorful ink spread and settle, resulting in a mesmerizing and fluid visual.
Chromatic Splotches only fill the left and right sides of the canvas. The art style should fill the screen
You've got a great eye! You're right, the "Chromatic Splotches" visualization was mapping audio features to positions in a way that didn't always guarantee a full-screen distribution. I've adjusted the logic to scatter the splotches randomly across the entire canvas, ensuring a beautiful, screen-filling effect every time. The size and color of each splotch are still driven by the music's energy and harmonic content.
Create a style of gas molecules that dance, bounce and ricochet within the canvas
Of course! I've added a new art style called "Kinetic Gas" that brings your music to life as a vibrant collection of dancing molecules.
From <https://aistudio.google.com/apps/drive/13n6SdnpdSQztqv_-IJN0pIz476nOKG5e?source=start&showAssistant=true&showPreview=true&resourceKey=> Google AI Studio






Comments
Post a Comment