Music notes Images to Frequency to Music mp3
This code is designed to process images of music sheets, detect musical notes from them, and then play the corresponding sounds. Here’s a step-by-step explanation of how it works:
1. Importing Libraries
import cv2
import numpy as np
import os
import simpleaudio as sa
- cv2: This is the OpenCV library used for image processing.
- numpy (np): A library for numerical operations, especially on arrays.
- os: A module that provides functions to interact with the operating system (e.g., reading files from directories).
- simpleaudio (sa): A library for playing audio in Python.
2. Function Definitions
a. read_image(image_path)
def read_image(image_path):
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
return image
- This function reads an image from the specified
image_pathand converts it to grayscale.
b. detect_staff_lines(image)
def detect_staff_lines(image):
edges = cv2.Canny(image, 50, 150, apertureSize=3)
lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
staff_lines = []
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
staff_lines.append((y1 + y2) // 2)
staff_lines = sorted(list(set(staff_lines)))
return staff_lines
- Edge Detection: Uses Canny edge detection to highlight the edges in the image.
- Line Detection: Applies the Hough Line Transform to detect straight lines (staff lines in sheet music).
- Staff Lines Extraction: Calculates the average y-coordinate of each detected line, sorts, and returns the unique values.
c. identify_notes(image, staff_lines)
def identify_notes(image, staff_lines):
_, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
note_positions = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 5 and h > 5:
note_positions.append(y + h // 2)
note_positions = sorted(note_positions)
notes = []
for pos in note_positions:
for i in range(len(staff_lines) - 1):
if staff_lines[i] <= pos < staff_lines[i + 1]:
notes.append(i)
break
return notes
- Thresholding: Converts the image to a binary format (black and white), isolating the notes.
- Contour Detection: Identifies the contours of the notes.
- Note Identification: Determines the vertical position of each note relative to the staff lines and maps it to a specific note.
d. convert_notes_to_frequencies(notes)
def convert_notes_to_frequencies(notes):
note_to_freq = {
0: 440.0,
1: 493.88,
2: 523.25,
3: 587.33,
4: 659.25,
5: 698.46,
6: 783.99
}
frequencies = [note_to_freq[note] for note in notes if note in note_to_freq]
return frequencies
- Note-to-Frequency Mapping: Converts the detected note positions into corresponding sound frequencies (e.g., A4 = 440 Hz).
e. generate_tone(frequency, duration=1.0, sample_rate=44100)
def generate_tone(frequency, duration=1.0, sample_rate=44100):
t = np.linspace(0, duration, int(sample_rate * duration), False)
tone = 0.5 * np.sin(2 * np.pi * frequency * t)
return tone
- Tone Generation: Creates a sine wave tone at the specified frequency and duration.
f. play_tone(tone, sample_rate=44100)
def play_tone(tone, sample_rate=44100):
tone = np.int16(tone * 32767)
play_obj = sa.play_buffer(tone, 1, 2, sample_rate)
play_obj.wait_done()
- Playing the Tone: Converts the generated tone into an audio buffer and plays it.
3. Main Function: process_music_sheets()
def process_music_sheets():
current_dir = os.getcwd()
image_files = [file for file in os.listdir(current_dir) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
for image_file in image_files:
print(f"Processing {image_file}...")
image = read_image(image_file)
staff_lines = detect_staff_lines(image)
notes = identify_notes(image, staff_lines)
frequencies = convert_notes_to_frequencies(notes)
for frequency in frequencies:
tone = generate_tone(frequency)
play_tone(tone)
- Processing Workflow:
- Gets the current directory and lists all image files.
- For each image, it reads the image, detects staff lines, identifies notes, converts notes to frequencies, generates tones, and plays them.
4. Execution Block
if __name__ == "__main__": process_music_sheets()
- Ensures the script runs the
process_music_sheetsfunction if it is executed directly (not imported as a module).
Summary
This script automates the process of reading sheet music images, detecting notes, converting them to their respective sound frequencies, and playing those notes. It's a basic example of how image processing and audio synthesis can be combined to interpret and play music from visual data.
CODE 1
import cv2
import numpy as np
import os
import simpleaudio as sa
# Function to read an image
def read_image(image_path):
# Load the image in grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
return image
# Function to detect staff lines in the image
def detect_staff_lines(image):
# Apply edge detection to find staff lines
edges = cv2.Canny(image, 50, 150, apertureSize=3)
lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
staff_lines = []
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
staff_lines.append((y1 + y2) // 2)
staff_lines = sorted(list(set(staff_lines)))
return staff_lines
# Function to identify notes based on their position on the staff lines
def identify_notes(image, staff_lines):
# Threshold the image to isolate notes
_, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
# Find contours of the notes
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
note_positions = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if w > 5 and h > 5: # filter out small noise
note_positions.append(y + h // 2)
note_positions = sorted(note_positions)
notes = []
for pos in note_positions:
for i in range(len(staff_lines) - 1):
if staff_lines[i] <= pos < staff_lines[i + 1]:
notes.append(i)
break
return notes
# Function to convert note positions into frequencies
def convert_notes_to_frequencies(notes):
# Define a mapping of positions to musical notes (example mapping)
note_to_freq = {
0: 440.0, # A4
1: 493.88, # B4
2: 523.25, # C5
3: 587.33, # D5
4: 659.25, # E5
5: 698.46, # F5
6: 783.99 # G5
}
frequencies = [note_to_freq[note] for note in notes if note in note_to_freq]
return frequencies
# Function to generate a tone at a given frequency
def generate_tone(frequency, duration=1.0, sample_rate=44100):
t = np.linspace(0, duration, int(sample_rate * duration), False)
tone = 0.5 * np.sin(2 * np.pi * frequency * t)
return tone
# Function to play the generated tone
def play_tone(tone, sample_rate=44100):
tone = np.int16(tone * 32767)
play_obj = sa.play_buffer(tone, 1, 2, sample_rate)
play_obj.wait_done()
# Main function to process all music sheets in the same folder
def process_music_sheets():
# Get the current working directory (where the script is located)
current_dir = os.getcwd()
# Get all image files in the current directory
image_files = [file for file in os.listdir(current_dir) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
# Process each image file
for image_file in image_files:
print(f"Processing {image_file}...")
# Read the image
image = read_image(image_file)
# Detect staff lines
staff_lines = detect_staff_lines(image)
# Identify notes based on their position
notes = identify_notes(image, staff_lines)
# Convert notes to frequencies
frequencies = convert_notes_to_frequencies(notes)
# Generate and play tones for each note
for frequency in frequencies:
tone = generate_tone(frequency)
play_tone(tone)
# Run the process_music_sheets function
if __name__ == "__main__":
process_music_sheets()
CODE 2
import cv2
import numpy as np
import pandas as pd
from scipy.io.wavfile import write
import soundfile as sf
from pydub import AudioSegment
import os
# Define base frequencies for the notes in the 4th and 5th octave
base_frequencies = {
'C': 261.63, 'D': 293.66, 'E': 329.63, 'F': 349.23, 'G': 392.00,
'A': 440.00, 'B': 493.88
}
# Frequency multiplier for octaves
octave_multipliers = {
4: 1,
5: 2
}
# 1. Image Reading and Preprocessing
def read_image(image_path):
print(f"Trying to read the image at: {image_path}")
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
if img is None:
print(f"Error: Unable to read image at {image_path}. Please check the file path.")
return None
_, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
return img_bin
# 2. Detect Staff Lines
def detect_staff_lines(img_bin):
horizontal_hist = np.sum(img_bin, axis=1)
staff_lines = []
threshold = max(horizontal_hist) * 0.7 # Tune this threshold if needed
for i in range(len(horizontal_hist)):
if horizontal_hist[i] > threshold:
staff_lines.append(i)
return staff_lines
# 3. Note Recognition
def recognize_notes(img_bin, staff_lines):
if not staff_lines:
return []
notes = []
line_spacing = np.median(np.diff(staff_lines))
note_positions = [line - line_spacing // 2 for line in staff_lines] + staff_lines
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
if h > line_spacing // 2 and w > line_spacing // 2:
# Map y-position to note
diff = np.array([abs(y - pos) for pos in note_positions])
closest = note_positions[np.argmin(diff)]
note = map_to_note_name(closest, staff_lines, line_spacing)
if note:
notes.append(note)
return notes
def map_to_note_name(y_position, staff_lines, line_spacing):
staff_lines.sort()
# Determine if the note is on a line or in a space
for i, line in enumerate(staff_lines):
if abs(y_position - line) < line_spacing / 2:
# Note is on a line
note_name = ['E', 'G', 'B', 'D', 'F'][i]
octave = 4 if i < 2 else 5
return (note_name, octave)
# Note is in a space
for i in range(len(staff_lines) - 1):
if staff_lines[i] < y_position < staff_lines[i + 1]:
note_name = ['F', 'A', 'C', 'E'][i]
octave = 4 if i < 2 else 5
return (note_name, octave)
return None
# 4. Note-to-Frequency Conversion
def notes_to_frequencies(notes):
frequencies = []
for note in notes:
if note is not None:
note_name, octave = note
base_freq = base_frequencies.get(note_name)
if base_freq:
freq = base_freq * octave_multipliers[octave]
frequencies.append(freq)
return frequencies
# 5. Save Frequencies to CSV
def save_frequencies_to_csv(frequencies, csv_filename):
df = pd.DataFrame(frequencies, columns=['Frequency'])
df.to_csv(csv_filename, index=False)
print(f"Frequencies saved to {csv_filename}")
# 6. Generate Sound and Save as MP3
def generate_and_save_sound(frequencies, duration_per_note=1.0, sample_rate=44100, output_filename="output.mp3"):
if not frequencies:
print(f"No frequencies found, skipping file {output_filename}")
return
t = np.linspace(0, duration_per_note, int(sample_rate * duration_per_note), False)
sound_wave = np.concatenate([np.sin(2 * np.pi * freq * t) for freq in frequencies])
# Normalize to 16-bit range
sound_wave = np.int16(sound_wave / np.max(np.abs(sound_wave)) * 32767)
# Save as WAV first
temp_wav_file = "temp_output.wav"
write(temp_wav_file, sample_rate, sound_wave)
# Convert WAV to MP3
sound = AudioSegment.from_wav(temp_wav_file)
sound.export(output_filename, format="mp3")
# Clean up temporary WAV file
os.remove(temp_wav_file)
print(f"Music saved to {output_filename}")
# Main Execution
if __name__ == "__main__":
# Set the input and output folder paths
input_folder = r'D:/Freelance/France_image_Processing/Input'
output_folder = r'D:/Freelance/France_image_Processing/Output'
# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)
# Iterate through each image file in the input folder
for filename in os.listdir(input_folder):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
image_path = os.path.join(input_folder, filename)
base_filename = os.path.splitext(filename)[0]
# Generate the output filenames
csv_filename = os.path.join(output_folder, f'{base_filename}.csv')
mp3_filename = os.path.join(output_folder, f'{base_filename}.mp3')
# Step 1: Read the image
img_bin = read_image(image_path)
if img_bin is None:
continue # Skip this file if the image could not be read
# Step 2: Detect staff lines
staff_lines = detect_staff_lines(img_bin)
if not staff_lines:
print(f"Error: No staff lines detected in {filename}.")
continue
# Step 3: Recognize notes
notes = recognize_notes(img_bin, staff_lines)
if not notes:
print(f"No notes recognized in {filename}.")
continue
print(f"Recognized Notes for {filename}: {notes}")
# Step 4: Convert notes to frequencies
frequencies = notes_to_frequencies(notes)
print(f"Frequencies for {filename}: {frequencies}")
# Step 5: Save frequencies to CSV
save_frequencies_to_csv(frequencies, csv_filename)
# Step 6: Generate and save sound as MP3
generate_and_save_sound(frequencies, output_filename=mp3_filename)
Comments
Post a Comment