Files
convertions/utils/audiototext.py
2025-01-31 20:07:02 -05:00

87 lines
3.3 KiB
Python
Executable File

import speech_recognition as sr
from pydub import AudioSegment
import os
import sys
def convert_audio_to_text(input_audio_path, output_text_path, chunk_length_ms=30000):
# Check if the input audio file exists
if not os.path.isfile(input_audio_path):
print(f"Error: The file '{input_audio_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_text_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
# Convert the audio to WAV format if needed
wav_audio_path = input_audio_path
if input_audio_path.lower().endswith('.mp3'):
try:
sound = AudioSegment.from_mp3(input_audio_path)
wav_audio_path = input_audio_path.replace('.mp3', '.wav')
sound.export(wav_audio_path, format="wav")
print(f"Converted '{input_audio_path}' to WAV format.")
except Exception as e:
print(f"Error converting MP3 to WAV: {e}")
sys.exit(1)
# Initialize the recognizer
recognizer = sr.Recognizer()
try:
# Load the full audio file using pydub
audio = AudioSegment.from_wav(wav_audio_path)
# Split audio into chunks and transcribe each chunk
num_chunks = len(audio) // chunk_length_ms + 1
full_text = ""
for i in range(num_chunks):
start_time = i * chunk_length_ms
end_time = min((i + 1) * chunk_length_ms, len(audio))
audio_chunk = audio[start_time:end_time]
chunk_path = f"temp_chunk_{i}.wav"
audio_chunk.export(chunk_path, format="wav")
with sr.AudioFile(chunk_path) as source:
audio_data = recognizer.record(source)
try:
# Transcribe the chunk
chunk_text = recognizer.recognize_google(audio_data)
full_text += f"Chunk {i + 1}:\n{chunk_text}\n\n"
except sr.UnknownValueError:
print(f"Chunk {i + 1}: Unable to recognize speech.")
except sr.RequestError as e:
print(f"Error with chunk {i + 1}: {e}")
sys.exit(1)
finally:
# Clean up the temporary chunk file
os.remove(chunk_path)
# Save the transcribed text to the output file
with open(output_text_path, 'w', encoding='utf-8') as file:
file.write(full_text)
print(f"Transcription complete. Check the '{output_text_path}' file.")
except Exception as e:
print(f"An unexpected error occurred during transcription: {e}")
sys.exit(1)
finally:
# Clean up the temporary WAV file if it was created
if wav_audio_path != input_audio_path and os.path.exists(wav_audio_path):
os.remove(wav_audio_path)
print(f"Deleted temporary file '{wav_audio_path}'.")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python audiototext.py <input_audio_path> <output_text_path>")
sys.exit(1)
input_audio_path = sys.argv[1]
output_text_path = sys.argv[2]
convert_audio_to_text(input_audio_path, output_text_path)