Music notes Images to Frequency to Music mp3

 

This code is designed to process images of music sheets, detect musical notes from them, and then play the corresponding sounds. Here’s a step-by-step explanation of how it works:

1. Importing Libraries

import cv2 import numpy as np import os import simpleaudio as sa
  • cv2: This is the OpenCV library used for image processing.
  • numpy (np): A library for numerical operations, especially on arrays.
  • os: A module that provides functions to interact with the operating system (e.g., reading files from directories).
  • simpleaudio (sa): A library for playing audio in Python.

2. Function Definitions

a. read_image(image_path)

def read_image(image_path): image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) return image
  • This function reads an image from the specified image_path and converts it to grayscale.

b. detect_staff_lines(image)

def detect_staff_lines(image): edges = cv2.Canny(image, 50, 150, apertureSize=3) lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10) staff_lines = [] if lines is not None: for line in lines: x1, y1, x2, y2 = line[0] staff_lines.append((y1 + y2) // 2) staff_lines = sorted(list(set(staff_lines))) return staff_lines
  • Edge Detection: Uses Canny edge detection to highlight the edges in the image.
  • Line Detection: Applies the Hough Line Transform to detect straight lines (staff lines in sheet music).
  • Staff Lines Extraction: Calculates the average y-coordinate of each detected line, sorts, and returns the unique values.

c. identify_notes(image, staff_lines)

def identify_notes(image, staff_lines): _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) note_positions = [] for contour in contours: x, y, w, h = cv2.boundingRect(contour) if w > 5 and h > 5: note_positions.append(y + h // 2) note_positions = sorted(note_positions) notes = [] for pos in note_positions: for i in range(len(staff_lines) - 1): if staff_lines[i] <= pos < staff_lines[i + 1]: notes.append(i) break return notes
  • Thresholding: Converts the image to a binary format (black and white), isolating the notes.
  • Contour Detection: Identifies the contours of the notes.
  • Note Identification: Determines the vertical position of each note relative to the staff lines and maps it to a specific note.

d. convert_notes_to_frequencies(notes)

def convert_notes_to_frequencies(notes): note_to_freq = { 0: 440.0, 1: 493.88, 2: 523.25, 3: 587.33, 4: 659.25, 5: 698.46, 6: 783.99 } frequencies = [note_to_freq[note] for note in notes if note in note_to_freq] return frequencies
  • Note-to-Frequency Mapping: Converts the detected note positions into corresponding sound frequencies (e.g., A4 = 440 Hz).

e. generate_tone(frequency, duration=1.0, sample_rate=44100)

def generate_tone(frequency, duration=1.0, sample_rate=44100): t = np.linspace(0, duration, int(sample_rate * duration), False) tone = 0.5 * np.sin(2 * np.pi * frequency * t) return tone
  • Tone Generation: Creates a sine wave tone at the specified frequency and duration.

f. play_tone(tone, sample_rate=44100)

def play_tone(tone, sample_rate=44100): tone = np.int16(tone * 32767) play_obj = sa.play_buffer(tone, 1, 2, sample_rate) play_obj.wait_done()
  • Playing the Tone: Converts the generated tone into an audio buffer and plays it.

3. Main Function: process_music_sheets()

def process_music_sheets(): current_dir = os.getcwd() image_files = [file for file in os.listdir(current_dir) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))] for image_file in image_files: print(f"Processing {image_file}...") image = read_image(image_file) staff_lines = detect_staff_lines(image) notes = identify_notes(image, staff_lines) frequencies = convert_notes_to_frequencies(notes) for frequency in frequencies: tone = generate_tone(frequency) play_tone(tone)
  • Processing Workflow:
    1. Gets the current directory and lists all image files.
    2. For each image, it reads the image, detects staff lines, identifies notes, converts notes to frequencies, generates tones, and plays them.

4. Execution Block

if __name__ == "__main__":
process_music_sheets()
  • Ensures the script runs the process_music_sheets function if it is executed directly (not imported as a module).

Summary

This script automates the process of reading sheet music images, detecting notes, converting them to their respective sound frequencies, and playing those notes. It's a basic example of how image processing and audio synthesis can be combined to interpret and play music from visual data.

CODE 1

import cv2

import numpy as np
import os
import simpleaudio as sa

# Function to read an image
def read_image(image_path):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    return image

# Function to detect staff lines in the image
def detect_staff_lines(image):
    # Apply edge detection to find staff lines
    edges = cv2.Canny(image, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
   
    staff_lines = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            staff_lines.append((y1 + y2) // 2)
   
    staff_lines = sorted(list(set(staff_lines)))
    return staff_lines

# Function to identify notes based on their position on the staff lines
def identify_notes(image, staff_lines):
    # Threshold the image to isolate notes
    _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
   
    # Find contours of the notes
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
   
    note_positions = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if w > 5 and h > 5:  # filter out small noise
            note_positions.append(y + h // 2)
   
    note_positions = sorted(note_positions)
   
    notes = []
    for pos in note_positions:
        for i in range(len(staff_lines) - 1):
            if staff_lines[i] <= pos < staff_lines[i + 1]:
                notes.append(i)
                break
   
    return notes

# Function to convert note positions into frequencies
def convert_notes_to_frequencies(notes):
    # Define a mapping of positions to musical notes (example mapping)
    note_to_freq = {
        0: 440.0,  # A4
        1: 493.88,  # B4
        2: 523.25,  # C5
        3: 587.33,  # D5
        4: 659.25,  # E5
        5: 698.46,  # F5
        6: 783.99   # G5
    }
   
    frequencies = [note_to_freq[note] for note in notes if note in note_to_freq]
    return frequencies

# Function to generate a tone at a given frequency
def generate_tone(frequency, duration=1.0, sample_rate=44100):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    tone = 0.5 * np.sin(2 * np.pi * frequency * t)
    return tone

# Function to play the generated tone
def play_tone(tone, sample_rate=44100):
    tone = np.int16(tone * 32767)
    play_obj = sa.play_buffer(tone, 1, 2, sample_rate)
    play_obj.wait_done()

# Main function to process all music sheets in the same folder
def process_music_sheets():
    # Get the current working directory (where the script is located)
    current_dir = os.getcwd()

    # Get all image files in the current directory
    image_files = [file for file in os.listdir(current_dir) if file.endswith(('.png', '.jpg', '.jpeg', '.bmp'))]

    # Process each image file
    for image_file in image_files:
        print(f"Processing {image_file}...")
       
        # Read the image
        image = read_image(image_file)
       
        # Detect staff lines
        staff_lines = detect_staff_lines(image)
       
        # Identify notes based on their position
        notes = identify_notes(image, staff_lines)
       
        # Convert notes to frequencies
        frequencies = convert_notes_to_frequencies(notes)
       
        # Generate and play tones for each note
        for frequency in frequencies:
            tone = generate_tone(frequency)
            play_tone(tone)

# Run the process_music_sheets function
if __name__ == "__main__":
    process_music_sheets()


CODE 2

import cv2
import numpy as np
import pandas as pd
from scipy.io.wavfile import write
import soundfile as sf
from pydub import AudioSegment
import os

# Define base frequencies for the notes in the 4th and 5th octave
base_frequencies = {
    'C': 261.63, 'D': 293.66, 'E': 329.63, 'F': 349.23, 'G': 392.00,
    'A': 440.00, 'B': 493.88
}

# Frequency multiplier for octaves
octave_multipliers = {
    4: 1,
    5: 2
}

# 1. Image Reading and Preprocessing
def read_image(image_path):
    print(f"Trying to read the image at: {image_path}")
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Unable to read image at {image_path}. Please check the file path.")
        return None
    _, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)
    return img_bin

# 2. Detect Staff Lines
def detect_staff_lines(img_bin):
    horizontal_hist = np.sum(img_bin, axis=1)
    staff_lines = []
    threshold = max(horizontal_hist) * 0.7  # Tune this threshold if needed
   
    for i in range(len(horizontal_hist)):
        if horizontal_hist[i] > threshold:
            staff_lines.append(i)
   
    return staff_lines

# 3. Note Recognition
def recognize_notes(img_bin, staff_lines):
    if not staff_lines:
        return []
   
    notes = []
    line_spacing = np.median(np.diff(staff_lines))
    note_positions = [line - line_spacing // 2 for line in staff_lines] + staff_lines

    contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
   
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if h > line_spacing // 2 and w > line_spacing // 2:
            # Map y-position to note
            diff = np.array([abs(y - pos) for pos in note_positions])
            closest = note_positions[np.argmin(diff)]
            note = map_to_note_name(closest, staff_lines, line_spacing)
            if note:
                notes.append(note)
   
    return notes

def map_to_note_name(y_position, staff_lines, line_spacing):
    staff_lines.sort()
   
    # Determine if the note is on a line or in a space
    for i, line in enumerate(staff_lines):
        if abs(y_position - line) < line_spacing / 2:
            # Note is on a line
            note_name = ['E', 'G', 'B', 'D', 'F'][i]
            octave = 4 if i < 2 else 5
            return (note_name, octave)
   
    # Note is in a space
    for i in range(len(staff_lines) - 1):
        if staff_lines[i] < y_position < staff_lines[i + 1]:
            note_name = ['F', 'A', 'C', 'E'][i]
            octave = 4 if i < 2 else 5
            return (note_name, octave)

    return None

# 4. Note-to-Frequency Conversion
def notes_to_frequencies(notes):
    frequencies = []
    for note in notes:
        if note is not None:
            note_name, octave = note
            base_freq = base_frequencies.get(note_name)
            if base_freq:
                freq = base_freq * octave_multipliers[octave]
                frequencies.append(freq)
    return frequencies

# 5. Save Frequencies to CSV
def save_frequencies_to_csv(frequencies, csv_filename):
    df = pd.DataFrame(frequencies, columns=['Frequency'])
    df.to_csv(csv_filename, index=False)
    print(f"Frequencies saved to {csv_filename}")

# 6. Generate Sound and Save as MP3
def generate_and_save_sound(frequencies, duration_per_note=1.0, sample_rate=44100, output_filename="output.mp3"):
    if not frequencies:
        print(f"No frequencies found, skipping file {output_filename}")
        return

    t = np.linspace(0, duration_per_note, int(sample_rate * duration_per_note), False)
    sound_wave = np.concatenate([np.sin(2 * np.pi * freq * t) for freq in frequencies])
   
    # Normalize to 16-bit range
    sound_wave = np.int16(sound_wave / np.max(np.abs(sound_wave)) * 32767)
   
    # Save as WAV first
    temp_wav_file = "temp_output.wav"
    write(temp_wav_file, sample_rate, sound_wave)
   
    # Convert WAV to MP3
    sound = AudioSegment.from_wav(temp_wav_file)
    sound.export(output_filename, format="mp3")
   
    # Clean up temporary WAV file
    os.remove(temp_wav_file)
   
    print(f"Music saved to {output_filename}")

# Main Execution
if __name__ == "__main__":
    # Set the input and output folder paths
    input_folder = r'D:/Freelance/France_image_Processing/Input'
    output_folder = r'D:/Freelance/France_image_Processing/Output'
   
    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)
   
    # Iterate through each image file in the input folder
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(input_folder, filename)
            base_filename = os.path.splitext(filename)[0]
           
            # Generate the output filenames
            csv_filename = os.path.join(output_folder, f'{base_filename}.csv')
            mp3_filename = os.path.join(output_folder, f'{base_filename}.mp3')
           
            # Step 1: Read the image
            img_bin = read_image(image_path)
            if img_bin is None:
                continue  # Skip this file if the image could not be read
           
            # Step 2: Detect staff lines
            staff_lines = detect_staff_lines(img_bin)
            if not staff_lines:
                print(f"Error: No staff lines detected in {filename}.")
                continue
           
            # Step 3: Recognize notes
            notes = recognize_notes(img_bin, staff_lines)
            if not notes:
                print(f"No notes recognized in {filename}.")
                continue
           
            print(f"Recognized Notes for {filename}: {notes}")
           
            # Step 4: Convert notes to frequencies
            frequencies = notes_to_frequencies(notes)
            print(f"Frequencies for {filename}: {frequencies}")
           
            # Step 5: Save frequencies to CSV
            save_frequencies_to_csv(frequencies, csv_filename)
           
            # Step 6: Generate and save sound as MP3
            generate_and_save_sound(frequencies, output_filename=mp3_filename)

Comments