From ef6496ada952a511ae5582a63626514c8465b9de Mon Sep 17 00:00:00 2001 From: klein panic Date: Fri, 6 Dec 2024 13:02:19 -0500 Subject: [PATCH] fixed this shit bruh wtf --- .gitignore | 3 + convertions.py | 48 +++++-- utils/m4atomp3.py | 45 +++++++ utils/mp3tom4a.py | 45 +++++++ utils/mp3towav.py | 46 +++++++ utils/wavtomp3.py | 46 +++++++ utils/wavtotext.py | 311 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 534 insertions(+), 10 deletions(-) create mode 100644 utils/m4atomp3.py create mode 100644 utils/mp3tom4a.py create mode 100644 utils/mp3towav.py create mode 100644 utils/wavtomp3.py create mode 100644 utils/wavtotext.py diff --git a/.gitignore b/.gitignore index a9b87be..5ac1bca 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ venv/ sample_tests/ backups/ + +# Ignore Vosk model directory +model diff --git a/convertions.py b/convertions.py index cc0b7cf..63ef86f 100755 --- a/convertions.py +++ b/convertions.py @@ -43,7 +43,13 @@ SCRIPT_MAP = { "texttospeech": "texttospeech.py", "videotoaudio": "videotoaudio.py", "pdftoexcel": "pdftoexcel.py", - "mergepdfs": "mergepdfs.py" + "mergepdfs": "mergepdfs.py", + "m4atomp3": "m4atomp3.py", + "mp3tom4a": "mp3tom4a.py", + "mp3towav": "mp3towav.py", + "wavtomp3": "wavtomp3.py", + "wavtotext":"wavtotext.py", + "mdtodocx":"mdtodocx.py" } # Display help for all commands @@ -74,6 +80,12 @@ Available commands: pdftoexcel docxtomd mergepdfs ... + m4atomp3 + mp3tom4a + mp3towav + wavtomp3 + wavtotext + mdtodocx """ print(help_text) @@ -93,19 +105,35 @@ def get_script_path(script_name): def validate_paths(command, args): """Validate paths for commands with specific argument requirements.""" + # Commands that require multiple input files and one output file if command in ["jpgstopdf", "csvmerge", "mergepdfs"]: + if len(args) < 2: + raise ValueError(f"Command '{command}' requires at least one output file and one or more input files.") + output_path = expand_path(args[0]) input_paths = [expand_path(path) for path in args[1:]] - if len(input_paths) < 2: - raise ValueError(f"Command '{command}' requires at least two input files.") + + if not os.path.isfile(output_path) and not os.access(os.path.dirname(output_path) or '.', os.W_OK): + raise ValueError(f"Output file '{output_path}' is not writable.") + if not all(os.path.isfile(path) for path in input_paths): - raise FileNotFoundError("One or more input paths do not exist.") - else: - if len(args) < 2: - raise ValueError(f"Command '{command}' requires an input and an output path.") - input_path, output_path = expand_path(args[0]), expand_path(args[1]) - if not os.path.isfile(input_path): - raise FileNotFoundError(f"Input file '{input_path}' does not exist.") + missing_files = [path for path in input_paths if not os.path.isfile(path)] + raise FileNotFoundError(f"One or more input files do not exist: {', '.join(missing_files)}") + + return output_path + + # Commands that require exactly one input file and one output file + if len(args) < 2: + raise ValueError(f"Command '{command}' requires an input file and an output file.") + + input_path, output_path = expand_path(args[0]), expand_path(args[1]) + + if not os.path.isfile(input_path): + raise FileNotFoundError(f"Input file '{input_path}' does not exist.") + + if not os.access(os.path.dirname(output_path) or '.', os.W_OK): + raise ValueError(f"Output directory for '{output_path}' is not writable.") + return output_path def run_command(command, args): diff --git a/utils/m4atomp3.py b/utils/m4atomp3.py new file mode 100644 index 0000000..4ffdb02 --- /dev/null +++ b/utils/m4atomp3.py @@ -0,0 +1,45 @@ +from pydub import AudioSegment + +import sys + +import os + + + +def m4a_to_mp3(input_file, output_file): + + if not input_file.lower().endswith('.m4a'): + + raise ValueError("Input file must be an M4A file.") + + audio = AudioSegment.from_file(input_file, format="m4a") + + audio.export(output_file, format="mp3") + + print(f"Converted {input_file} to {output_file}.") + + + +if __name__ == "__main__": + + if len(sys.argv) != 3: + + print("Usage: python m4a_to_mp3.py ") + + else: + + input_file = sys.argv[1] + + output_file = sys.argv[2] + + if not output_file.lower().endswith('.mp3'): + + print("Output file must have a .mp3 extension.") + + elif not os.path.exists(input_file): + + print(f"Input file {input_file} does not exist.") + + else: + + m4a_to_mp3(input_file, output_file) diff --git a/utils/mp3tom4a.py b/utils/mp3tom4a.py new file mode 100644 index 0000000..360f3d3 --- /dev/null +++ b/utils/mp3tom4a.py @@ -0,0 +1,45 @@ +from pydub import AudioSegment + +import sys + +import os + + + +def mp3_to_m4a(input_file, output_file): + + if not input_file.lower().endswith('.mp3'): + + raise ValueError("Input file must be an MP3 file.") + + audio = AudioSegment.from_file(input_file, format="mp3") + + audio.export(output_file, format="mp4") + + print(f"Converted {input_file} to {output_file}.") + + + +if __name__ == "__main__": + + if len(sys.argv) != 3: + + print("Usage: python mp3_to_m4a.py ") + + else: + + input_file = sys.argv[1] + + output_file = sys.argv[2] + + if not output_file.lower().endswith('.m4a'): + + print("Output file must have a .m4a extension.") + + elif not os.path.exists(input_file): + + print(f"Input file {input_file} does not exist.") + + else: + + mp3_to_m4a(input_file, output_file) diff --git a/utils/mp3towav.py b/utils/mp3towav.py new file mode 100644 index 0000000..b331b24 --- /dev/null +++ b/utils/mp3towav.py @@ -0,0 +1,46 @@ +from pydub import AudioSegment + +import sys + +import os + + + +def mp3_to_wav(input_file, output_file): + + if not input_file.lower().endswith('.mp3'): + + raise ValueError("Input file must be an MP3 file.") + + audio = AudioSegment.from_file(input_file, format="mp3") + + audio.export(output_file, format="wav") + + print(f"Converted {input_file} to {output_file}.") + + + +if __name__ == "__main__": + + if len(sys.argv) != 3: + + print("Usage: python mp3_to_wav.py ") + + else: + + input_file = sys.argv[1] + + output_file = sys.argv[2] + + if not output_file.lower().endswith('.wav'): + + print("Output file must have a .wav extension.") + + elif not os.path.exists(input_file): + + print(f"Input file {input_file} does not exist.") + + else: + + mp3_to_wav(input_file, output_file) + diff --git a/utils/wavtomp3.py b/utils/wavtomp3.py new file mode 100644 index 0000000..5ad3ca2 --- /dev/null +++ b/utils/wavtomp3.py @@ -0,0 +1,46 @@ +from pydub import AudioSegment + +import sys + +import os + + + +def wav_to_mp3(input_file, output_file): + + if not input_file.lower().endswith('.wav'): + + raise ValueError("Input file must be a WAV file.") + + audio = AudioSegment.from_file(input_file, format="wav") + + audio.export(output_file, format="mp3") + + print(f"Converted {input_file} to {output_file}.") + + + +if __name__ == "__main__": + + if len(sys.argv) != 3: + + print("Usage: python wav_to_mp3.py ") + + else: + + input_file = sys.argv[1] + + output_file = sys.argv[2] + + if not output_file.lower().endswith('.mp3'): + + print("Output file must have a .mp3 extension.") + + elif not os.path.exists(input_file): + + print(f"Input file {input_file} does not exist.") + + else: + + wav_to_mp3(input_file, output_file) + diff --git a/utils/wavtotext.py b/utils/wavtotext.py new file mode 100644 index 0000000..d1f893e --- /dev/null +++ b/utils/wavtotext.py @@ -0,0 +1,311 @@ +import speech_recognition as sr + +import sys + +import os + +import time + +import subprocess + +import requests + +import zipfile + +from pathlib import Path + + + +try: + + from vosk import Model, KaldiRecognizer + +except ImportError: + + vosk_installed = False + +else: + + vosk_installed = True + + + +VOSK_MODEL_URL = "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip" + +VOSK_MODEL_DIR = os.path.abspath("model") + + + +def download_and_setup_vosk(): + + """Download and extract the Vosk model.""" + + print("Downloading Vosk model... This may take a few minutes.") + + response = requests.get(VOSK_MODEL_URL, stream=True) + + zip_path = "vosk_model.zip" + + + + with open(zip_path, "wb") as file: + + for chunk in response.iter_content(chunk_size=8192): + + file.write(chunk) + + + + print("Extracting Vosk model...") + + with zipfile.ZipFile(zip_path, "r") as zip_ref: + + zip_ref.extractall(VOSK_MODEL_DIR) + + + + os.remove(zip_path) + + print(f"Vosk model downloaded and set up in '{VOSK_MODEL_DIR}'.") + + + + # Verify that the model directory contains necessary files + + verify_model_files() + + + + # Add model directory to .gitignore + + with open(".gitignore", "a") as gitignore: + + gitignore.write(f"\\n# Ignore Vosk model directory\\n{VOSK_MODEL_DIR}\\n") + + + +def verify_model_files(): + """Verify that all required files are present in the model directory.""" + required_files = ["conf/model.conf", "am/final.mdl"] + missing_files = [file for file in required_files if not Path(VOSK_MODEL_DIR, file).exists()] + if missing_files: + raise Exception(f"Model file(s) missing: {', '.join(missing_files)}. Re-download the Vosk model manually.") + +def convert_to_vosk_compatible_wav(input_file): + + """Convert the WAV file to a Vosk-compatible format using ffmpeg.""" + + output_file = "converted.wav" + + try: + + print(f"Converting {input_file} to Vosk-compatible format...") + + subprocess.run([ + + "ffmpeg", "-i", input_file, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", output_file + + ], check=True) + + print(f"Converted file saved as {output_file}.") + + return output_file + + except subprocess.CalledProcessError as e: + + print(f"Error converting file: {e}") + + return None + + + +def online_wav_to_text(input_file): + + recognizer = sr.Recognizer() + + with sr.AudioFile(input_file) as source: + + print("Processing audio for online recognition...") + + audio_data = recognizer.record(source) + + for attempt in range(3): # Retry up to 3 times + + try: + + return recognizer.recognize_google(audio_data) + + except sr.RequestError as e: + + print(f"API error on attempt {attempt + 1}: {e}") + + time.sleep(2 ** attempt) # Exponential backoff + + except sr.UnknownValueError: + + print("Speech recognition could not understand the audio.") + + return None + + return None + + + +def offline_wav_to_text(input_file): + + import wave # Ensure wave is imported before using offline recognition + + + + model_path = Path(VOSK_MODEL_DIR) + + if not model_path.exists(): + + print("Offline model not found. Would you like to set it up now? [y/N]") + + choice = input().strip().lower() + + if choice == 'y': + + download_and_setup_vosk() + + else: + + print("Skipping offline setup. Exiting.") + + return None + + + + try: + + verify_model_files() + + except Exception as e: + + print(e) + + return None + + + + wf = wave.open(input_file, "rb") + + if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() not in [8000, 16000]: + + print("Audio file must be WAV format mono PCM.") + + wf.close() + + return None + + + + model = Model(VOSK_MODEL_DIR) + + recognizer = KaldiRecognizer(model, wf.getframerate()) + + + + print("Processing audio for offline recognition...") + + results = [] + + while True: + + data = wf.readframes(4000) + + if len(data) == 0: + + break + + if recognizer.AcceptWaveform(data): + + results.append(recognizer.Result()) + + wf.close() + + + + # Combine results into a single text + + return " ".join([result["text"] for result in map(eval, results)]) + + + +def wav_to_text(input_file, output_file): + + if not input_file.lower().endswith('.wav'): + + raise ValueError("Input file must be a WAV file.") + + + + # Check and convert file format if necessary + + converted_file = convert_to_vosk_compatible_wav(input_file) + + if not converted_file: + + print("File conversion failed. Unable to proceed.") + + return False + + + + text = online_wav_to_text(converted_file) + + if text is None: # Fallback to offline if online fails + + print("Online recognition failed. Switching to offline recognition...") + + text = offline_wav_to_text(converted_file) + + + + if text: + + with open(output_file, 'w') as f: + + f.write(text) + + print(f"Transcription completed successfully. Output saved to '{output_file}'.") + + return True + + + + print("Transcription failed. Please check the error message above.") + + return False + + + +if __name__ == "__main__": + + if len(sys.argv) != 3: + + print("Usage: python wav_to_text.py ") + + else: + + input_file = sys.argv[1] + + output_file = sys.argv[2] + + if not output_file.lower().endswith('.txt'): + + print("Output file must have a .txt extension.") + + elif not os.path.exists(input_file): + + print(f"Input file {input_file} does not exist.") + + else: + + success = wav_to_text(input_file, output_file) + + if not success: + + sys.exit(1) # Exit with a non-zero status code on failure +