fixed this shit bruh wtf

2024-12-06 13:02:19 -05:00
parent c1372606a6
commit ef6496ada9
7 changed files with 534 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
 venv/
 sample_tests/
 backups/
+
+# Ignore Vosk model directory
+model
--- a/convertions.py
+++ b/convertions.py
@@ -43,7 +43,13 @@ SCRIPT_MAP = {
    "texttospeech": "texttospeech.py",
    "videotoaudio": "videotoaudio.py",
    "pdftoexcel": "pdftoexcel.py",
-    "mergepdfs": "mergepdfs.py"
+    "mergepdfs": "mergepdfs.py",
+    "m4atomp3": "m4atomp3.py",
+    "mp3tom4a": "mp3tom4a.py",
+    "mp3towav": "mp3towav.py",
+    "wavtomp3": "wavtomp3.py",
+    "wavtotext":"wavtotext.py",
+    "mdtodocx":"mdtodocx.py"
 }

 # Display help for all commands
@@ -74,6 +80,12 @@ Available commands:
  pdftoexcel <input_pdf_path> <output_excel_path>
  docxtomd <input_docx_path> <output_md_path>
  mergepdfs <output_pdf_path> <input_pdf1> <input_pdf2> ...
+  m4atomp3 <input_m4a_path> <output_mp3_path>
+  mp3tom4a <input_mp3_path> <output_m4a_path>
+  mp3towav <input_mp3_path> <output_wav_path>
+  wavtomp3 <input_wav_path> <output_mp3_path>
+  wavtotext <input_wav_path> <output_text_path>
+  mdtodocx <input_md_path> <output_docx_path>
 """
    print(help_text)

@@ -93,19 +105,35 @@ def get_script_path(script_name):

 def validate_paths(command, args):
    """Validate paths for commands with specific argument requirements."""
+    # Commands that require multiple input files and one output file
    if command in ["jpgstopdf", "csvmerge", "mergepdfs"]:
+        if len(args) < 2:
+            raise ValueError(f"Command '{command}' requires at least one output file and one or more input files.")
+        
        output_path = expand_path(args[0])
        input_paths = [expand_path(path) for path in args[1:]]
-        if len(input_paths) < 2:
-            raise ValueError(f"Command '{command}' requires at least two input files.")
+        
+        if not os.path.isfile(output_path) and not os.access(os.path.dirname(output_path) or '.', os.W_OK):
+            raise ValueError(f"Output file '{output_path}' is not writable.")
+        
        if not all(os.path.isfile(path) for path in input_paths):
-            raise FileNotFoundError("One or more input paths do not exist.")
-    else:
-        if len(args) < 2:
-            raise ValueError(f"Command '{command}' requires an input and an output path.")
-        input_path, output_path = expand_path(args[0]), expand_path(args[1])
-        if not os.path.isfile(input_path):
-            raise FileNotFoundError(f"Input file '{input_path}' does not exist.")
+            missing_files = [path for path in input_paths if not os.path.isfile(path)]
+            raise FileNotFoundError(f"One or more input files do not exist: {', '.join(missing_files)}")
+        
+        return output_path
+    
+    # Commands that require exactly one input file and one output file
+    if len(args) < 2:
+        raise ValueError(f"Command '{command}' requires an input file and an output file.")
+    
+    input_path, output_path = expand_path(args[0]), expand_path(args[1])
+    
+    if not os.path.isfile(input_path):
+        raise FileNotFoundError(f"Input file '{input_path}' does not exist.")
+    
+    if not os.access(os.path.dirname(output_path) or '.', os.W_OK):
+        raise ValueError(f"Output directory for '{output_path}' is not writable.")
+    
    return output_path

 def run_command(command, args):
--- a/utils/m4atomp3.py
+++ b/utils/m4atomp3.py
@@ -0,0 +1,45 @@
+from pydub import AudioSegment
+
+import sys
+
+import os
+
+
+
+def m4a_to_mp3(input_file, output_file):
+
+    if not input_file.lower().endswith('.m4a'):
+
+        raise ValueError("Input file must be an M4A file.")
+
+    audio = AudioSegment.from_file(input_file, format="m4a")
+
+    audio.export(output_file, format="mp3")
+
+    print(f"Converted {input_file} to {output_file}.")
+
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+
+        print("Usage: python m4a_to_mp3.py <input_file.m4a> <output_file.mp3>")
+
+    else:
+
+        input_file = sys.argv[1]
+
+        output_file = sys.argv[2]
+
+        if not output_file.lower().endswith('.mp3'):
+
+            print("Output file must have a .mp3 extension.")
+
+        elif not os.path.exists(input_file):
+
+            print(f"Input file {input_file} does not exist.")
+
+        else:
+
+            m4a_to_mp3(input_file, output_file)
--- a/utils/mp3tom4a.py
+++ b/utils/mp3tom4a.py
@@ -0,0 +1,45 @@
+from pydub import AudioSegment
+
+import sys
+
+import os
+
+
+
+def mp3_to_m4a(input_file, output_file):
+
+    if not input_file.lower().endswith('.mp3'):
+
+        raise ValueError("Input file must be an MP3 file.")
+
+    audio = AudioSegment.from_file(input_file, format="mp3")
+
+    audio.export(output_file, format="mp4")
+
+    print(f"Converted {input_file} to {output_file}.")
+
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+
+        print("Usage: python mp3_to_m4a.py <input_file.mp3> <output_file.m4a>")
+
+    else:
+
+        input_file = sys.argv[1]
+
+        output_file = sys.argv[2]
+
+        if not output_file.lower().endswith('.m4a'):
+
+            print("Output file must have a .m4a extension.")
+
+        elif not os.path.exists(input_file):
+
+            print(f"Input file {input_file} does not exist.")
+
+        else:
+
+            mp3_to_m4a(input_file, output_file)
--- a/utils/mp3towav.py
+++ b/utils/mp3towav.py
@@ -0,0 +1,46 @@
+from pydub import AudioSegment
+
+import sys
+
+import os
+
+
+
+def mp3_to_wav(input_file, output_file):
+
+    if not input_file.lower().endswith('.mp3'):
+
+        raise ValueError("Input file must be an MP3 file.")
+
+    audio = AudioSegment.from_file(input_file, format="mp3")
+
+    audio.export(output_file, format="wav")
+
+    print(f"Converted {input_file} to {output_file}.")
+
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+
+        print("Usage: python mp3_to_wav.py <input_file.mp3> <output_file.wav>")
+
+    else:
+
+        input_file = sys.argv[1]
+
+        output_file = sys.argv[2]
+
+        if not output_file.lower().endswith('.wav'):
+
+            print("Output file must have a .wav extension.")
+
+        elif not os.path.exists(input_file):
+
+            print(f"Input file {input_file} does not exist.")
+
+        else:
+
+            mp3_to_wav(input_file, output_file)
+
--- a/utils/wavtomp3.py
+++ b/utils/wavtomp3.py
@@ -0,0 +1,46 @@
+from pydub import AudioSegment
+
+import sys
+
+import os
+
+
+
+def wav_to_mp3(input_file, output_file):
+
+    if not input_file.lower().endswith('.wav'):
+
+        raise ValueError("Input file must be a WAV file.")
+
+    audio = AudioSegment.from_file(input_file, format="wav")
+
+    audio.export(output_file, format="mp3")
+
+    print(f"Converted {input_file} to {output_file}.")
+
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+
+        print("Usage: python wav_to_mp3.py <input_file.wav> <output_file.mp3>")
+
+    else:
+
+        input_file = sys.argv[1]
+
+        output_file = sys.argv[2]
+
+        if not output_file.lower().endswith('.mp3'):
+
+            print("Output file must have a .mp3 extension.")
+
+        elif not os.path.exists(input_file):
+
+            print(f"Input file {input_file} does not exist.")
+
+        else:
+
+            wav_to_mp3(input_file, output_file)
+
--- a/utils/wavtotext.py
+++ b/utils/wavtotext.py
@@ -0,0 +1,311 @@
+import speech_recognition as sr
+
+import sys
+
+import os
+
+import time
+
+import subprocess
+
+import requests
+
+import zipfile
+
+from pathlib import Path
+
+
+
+try:
+
+    from vosk import Model, KaldiRecognizer
+
+except ImportError:
+
+    vosk_installed = False
+
+else:
+
+    vosk_installed = True
+
+
+
+VOSK_MODEL_URL = "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"
+
+VOSK_MODEL_DIR = os.path.abspath("model")
+
+
+
+def download_and_setup_vosk():
+
+    """Download and extract the Vosk model."""
+
+    print("Downloading Vosk model... This may take a few minutes.")
+
+    response = requests.get(VOSK_MODEL_URL, stream=True)
+
+    zip_path = "vosk_model.zip"
+
+
+
+    with open(zip_path, "wb") as file:
+
+        for chunk in response.iter_content(chunk_size=8192):
+
+            file.write(chunk)
+
+
+
+    print("Extracting Vosk model...")
+
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
+
+        zip_ref.extractall(VOSK_MODEL_DIR)
+
+
+
+    os.remove(zip_path)
+
+    print(f"Vosk model downloaded and set up in '{VOSK_MODEL_DIR}'.")
+
+
+
+    # Verify that the model directory contains necessary files
+
+    verify_model_files()
+
+
+
+    # Add model directory to .gitignore
+
+    with open(".gitignore", "a") as gitignore:
+
+        gitignore.write(f"\\n# Ignore Vosk model directory\\n{VOSK_MODEL_DIR}\\n")
+
+
+
+def verify_model_files():
+    """Verify that all required files are present in the model directory."""
+    required_files = ["conf/model.conf", "am/final.mdl"]
+    missing_files = [file for file in required_files if not Path(VOSK_MODEL_DIR, file).exists()]
+    if missing_files:
+        raise Exception(f"Model file(s) missing: {', '.join(missing_files)}. Re-download the Vosk model manually.")
+
+def convert_to_vosk_compatible_wav(input_file):
+
+    """Convert the WAV file to a Vosk-compatible format using ffmpeg."""
+
+    output_file = "converted.wav"
+
+    try:
+
+        print(f"Converting {input_file} to Vosk-compatible format...")
+
+        subprocess.run([
+
+            "ffmpeg", "-i", input_file, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", output_file
+
+        ], check=True)
+
+        print(f"Converted file saved as {output_file}.")
+
+        return output_file
+
+    except subprocess.CalledProcessError as e:
+
+        print(f"Error converting file: {e}")
+
+        return None
+
+
+
+def online_wav_to_text(input_file):
+
+    recognizer = sr.Recognizer()
+
+    with sr.AudioFile(input_file) as source:
+
+        print("Processing audio for online recognition...")
+
+        audio_data = recognizer.record(source)
+
+        for attempt in range(3):  # Retry up to 3 times
+
+            try:
+
+                return recognizer.recognize_google(audio_data)
+
+            except sr.RequestError as e:
+
+                print(f"API error on attempt {attempt + 1}: {e}")
+
+                time.sleep(2 ** attempt)  # Exponential backoff
+
+            except sr.UnknownValueError:
+
+                print("Speech recognition could not understand the audio.")
+
+                return None
+
+        return None
+
+
+
+def offline_wav_to_text(input_file):
+
+    import wave  # Ensure wave is imported before using offline recognition
+
+
+
+    model_path = Path(VOSK_MODEL_DIR)
+
+    if not model_path.exists():
+
+        print("Offline model not found. Would you like to set it up now? [y/N]")
+
+        choice = input().strip().lower()
+
+        if choice == 'y':
+
+            download_and_setup_vosk()
+
+        else:
+
+            print("Skipping offline setup. Exiting.")
+
+            return None
+
+
+
+    try:
+
+        verify_model_files()
+
+    except Exception as e:
+
+        print(e)
+
+        return None
+
+
+
+    wf = wave.open(input_file, "rb")
+
+    if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() not in [8000, 16000]:
+
+        print("Audio file must be WAV format mono PCM.")
+
+        wf.close()
+
+        return None
+
+
+
+    model = Model(VOSK_MODEL_DIR)
+
+    recognizer = KaldiRecognizer(model, wf.getframerate())
+
+
+
+    print("Processing audio for offline recognition...")
+
+    results = []
+
+    while True:
+
+        data = wf.readframes(4000)
+
+        if len(data) == 0:
+
+            break
+
+        if recognizer.AcceptWaveform(data):
+
+            results.append(recognizer.Result())
+
+    wf.close()
+
+
+
+    # Combine results into a single text
+
+    return " ".join([result["text"] for result in map(eval, results)])
+
+
+
+def wav_to_text(input_file, output_file):
+
+    if not input_file.lower().endswith('.wav'):
+
+        raise ValueError("Input file must be a WAV file.")
+
+
+
+    # Check and convert file format if necessary
+
+    converted_file = convert_to_vosk_compatible_wav(input_file)
+
+    if not converted_file:
+
+        print("File conversion failed. Unable to proceed.")
+
+        return False
+
+
+
+    text = online_wav_to_text(converted_file)
+
+    if text is None:  # Fallback to offline if online fails
+
+        print("Online recognition failed. Switching to offline recognition...")
+
+        text = offline_wav_to_text(converted_file)
+
+
+
+    if text:
+
+        with open(output_file, 'w') as f:
+
+            f.write(text)
+
+        print(f"Transcription completed successfully. Output saved to '{output_file}'.")
+
+        return True
+
+
+
+    print("Transcription failed. Please check the error message above.")
+
+    return False
+
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+
+        print("Usage: python wav_to_text.py <input_file.wav> <output_file.txt>")
+
+    else:
+
+        input_file = sys.argv[1]
+
+        output_file = sys.argv[2]
+
+        if not output_file.lower().endswith('.txt'):
+
+            print("Output file must have a .txt extension.")
+
+        elif not os.path.exists(input_file):
+
+            print(f"Input file {input_file} does not exist.")
+
+        else:
+
+            success = wav_to_text(input_file, output_file)
+
+            if not success:
+
+                sys.exit(1)  # Exit with a non-zero status code on failure
+