fixed this shit bruh wtf
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,6 @@
|
||||
venv/
|
||||
sample_tests/
|
||||
backups/
|
||||
|
||||
# Ignore Vosk model directory
|
||||
model
|
||||
|
||||
@@ -43,7 +43,13 @@ SCRIPT_MAP = {
|
||||
"texttospeech": "texttospeech.py",
|
||||
"videotoaudio": "videotoaudio.py",
|
||||
"pdftoexcel": "pdftoexcel.py",
|
||||
"mergepdfs": "mergepdfs.py"
|
||||
"mergepdfs": "mergepdfs.py",
|
||||
"m4atomp3": "m4atomp3.py",
|
||||
"mp3tom4a": "mp3tom4a.py",
|
||||
"mp3towav": "mp3towav.py",
|
||||
"wavtomp3": "wavtomp3.py",
|
||||
"wavtotext":"wavtotext.py",
|
||||
"mdtodocx":"mdtodocx.py"
|
||||
}
|
||||
|
||||
# Display help for all commands
|
||||
@@ -74,6 +80,12 @@ Available commands:
|
||||
pdftoexcel <input_pdf_path> <output_excel_path>
|
||||
docxtomd <input_docx_path> <output_md_path>
|
||||
mergepdfs <output_pdf_path> <input_pdf1> <input_pdf2> ...
|
||||
m4atomp3 <input_m4a_path> <output_mp3_path>
|
||||
mp3tom4a <input_mp3_path> <output_m4a_path>
|
||||
mp3towav <input_mp3_path> <output_wav_path>
|
||||
wavtomp3 <input_wav_path> <output_mp3_path>
|
||||
wavtotext <input_wav_path> <output_text_path>
|
||||
mdtodocx <input_md_path> <output_docx_path>
|
||||
"""
|
||||
print(help_text)
|
||||
|
||||
@@ -93,19 +105,35 @@ def get_script_path(script_name):
|
||||
|
||||
def validate_paths(command, args):
|
||||
"""Validate paths for commands with specific argument requirements."""
|
||||
# Commands that require multiple input files and one output file
|
||||
if command in ["jpgstopdf", "csvmerge", "mergepdfs"]:
|
||||
if len(args) < 2:
|
||||
raise ValueError(f"Command '{command}' requires at least one output file and one or more input files.")
|
||||
|
||||
output_path = expand_path(args[0])
|
||||
input_paths = [expand_path(path) for path in args[1:]]
|
||||
if len(input_paths) < 2:
|
||||
raise ValueError(f"Command '{command}' requires at least two input files.")
|
||||
|
||||
if not os.path.isfile(output_path) and not os.access(os.path.dirname(output_path) or '.', os.W_OK):
|
||||
raise ValueError(f"Output file '{output_path}' is not writable.")
|
||||
|
||||
if not all(os.path.isfile(path) for path in input_paths):
|
||||
raise FileNotFoundError("One or more input paths do not exist.")
|
||||
else:
|
||||
if len(args) < 2:
|
||||
raise ValueError(f"Command '{command}' requires an input and an output path.")
|
||||
input_path, output_path = expand_path(args[0]), expand_path(args[1])
|
||||
if not os.path.isfile(input_path):
|
||||
raise FileNotFoundError(f"Input file '{input_path}' does not exist.")
|
||||
missing_files = [path for path in input_paths if not os.path.isfile(path)]
|
||||
raise FileNotFoundError(f"One or more input files do not exist: {', '.join(missing_files)}")
|
||||
|
||||
return output_path
|
||||
|
||||
# Commands that require exactly one input file and one output file
|
||||
if len(args) < 2:
|
||||
raise ValueError(f"Command '{command}' requires an input file and an output file.")
|
||||
|
||||
input_path, output_path = expand_path(args[0]), expand_path(args[1])
|
||||
|
||||
if not os.path.isfile(input_path):
|
||||
raise FileNotFoundError(f"Input file '{input_path}' does not exist.")
|
||||
|
||||
if not os.access(os.path.dirname(output_path) or '.', os.W_OK):
|
||||
raise ValueError(f"Output directory for '{output_path}' is not writable.")
|
||||
|
||||
return output_path
|
||||
|
||||
def run_command(command, args):
|
||||
|
||||
45
utils/m4atomp3.py
Normal file
45
utils/m4atomp3.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from pydub import AudioSegment
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
|
||||
|
||||
|
||||
def m4a_to_mp3(input_file, output_file):
|
||||
|
||||
if not input_file.lower().endswith('.m4a'):
|
||||
|
||||
raise ValueError("Input file must be an M4A file.")
|
||||
|
||||
audio = AudioSegment.from_file(input_file, format="m4a")
|
||||
|
||||
audio.export(output_file, format="mp3")
|
||||
|
||||
print(f"Converted {input_file} to {output_file}.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
||||
print("Usage: python m4a_to_mp3.py <input_file.m4a> <output_file.mp3>")
|
||||
|
||||
else:
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
output_file = sys.argv[2]
|
||||
|
||||
if not output_file.lower().endswith('.mp3'):
|
||||
|
||||
print("Output file must have a .mp3 extension.")
|
||||
|
||||
elif not os.path.exists(input_file):
|
||||
|
||||
print(f"Input file {input_file} does not exist.")
|
||||
|
||||
else:
|
||||
|
||||
m4a_to_mp3(input_file, output_file)
|
||||
45
utils/mp3tom4a.py
Normal file
45
utils/mp3tom4a.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from pydub import AudioSegment
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
|
||||
|
||||
|
||||
def mp3_to_m4a(input_file, output_file):
|
||||
|
||||
if not input_file.lower().endswith('.mp3'):
|
||||
|
||||
raise ValueError("Input file must be an MP3 file.")
|
||||
|
||||
audio = AudioSegment.from_file(input_file, format="mp3")
|
||||
|
||||
audio.export(output_file, format="mp4")
|
||||
|
||||
print(f"Converted {input_file} to {output_file}.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
||||
print("Usage: python mp3_to_m4a.py <input_file.mp3> <output_file.m4a>")
|
||||
|
||||
else:
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
output_file = sys.argv[2]
|
||||
|
||||
if not output_file.lower().endswith('.m4a'):
|
||||
|
||||
print("Output file must have a .m4a extension.")
|
||||
|
||||
elif not os.path.exists(input_file):
|
||||
|
||||
print(f"Input file {input_file} does not exist.")
|
||||
|
||||
else:
|
||||
|
||||
mp3_to_m4a(input_file, output_file)
|
||||
46
utils/mp3towav.py
Normal file
46
utils/mp3towav.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from pydub import AudioSegment
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
|
||||
|
||||
|
||||
def mp3_to_wav(input_file, output_file):
|
||||
|
||||
if not input_file.lower().endswith('.mp3'):
|
||||
|
||||
raise ValueError("Input file must be an MP3 file.")
|
||||
|
||||
audio = AudioSegment.from_file(input_file, format="mp3")
|
||||
|
||||
audio.export(output_file, format="wav")
|
||||
|
||||
print(f"Converted {input_file} to {output_file}.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
||||
print("Usage: python mp3_to_wav.py <input_file.mp3> <output_file.wav>")
|
||||
|
||||
else:
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
output_file = sys.argv[2]
|
||||
|
||||
if not output_file.lower().endswith('.wav'):
|
||||
|
||||
print("Output file must have a .wav extension.")
|
||||
|
||||
elif not os.path.exists(input_file):
|
||||
|
||||
print(f"Input file {input_file} does not exist.")
|
||||
|
||||
else:
|
||||
|
||||
mp3_to_wav(input_file, output_file)
|
||||
|
||||
46
utils/wavtomp3.py
Normal file
46
utils/wavtomp3.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from pydub import AudioSegment
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
|
||||
|
||||
|
||||
def wav_to_mp3(input_file, output_file):
|
||||
|
||||
if not input_file.lower().endswith('.wav'):
|
||||
|
||||
raise ValueError("Input file must be a WAV file.")
|
||||
|
||||
audio = AudioSegment.from_file(input_file, format="wav")
|
||||
|
||||
audio.export(output_file, format="mp3")
|
||||
|
||||
print(f"Converted {input_file} to {output_file}.")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
||||
print("Usage: python wav_to_mp3.py <input_file.wav> <output_file.mp3>")
|
||||
|
||||
else:
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
output_file = sys.argv[2]
|
||||
|
||||
if not output_file.lower().endswith('.mp3'):
|
||||
|
||||
print("Output file must have a .mp3 extension.")
|
||||
|
||||
elif not os.path.exists(input_file):
|
||||
|
||||
print(f"Input file {input_file} does not exist.")
|
||||
|
||||
else:
|
||||
|
||||
wav_to_mp3(input_file, output_file)
|
||||
|
||||
311
utils/wavtotext.py
Normal file
311
utils/wavtotext.py
Normal file
@@ -0,0 +1,311 @@
|
||||
import speech_recognition as sr
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
|
||||
import time
|
||||
|
||||
import subprocess
|
||||
|
||||
import requests
|
||||
|
||||
import zipfile
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
||||
try:
|
||||
|
||||
from vosk import Model, KaldiRecognizer
|
||||
|
||||
except ImportError:
|
||||
|
||||
vosk_installed = False
|
||||
|
||||
else:
|
||||
|
||||
vosk_installed = True
|
||||
|
||||
|
||||
|
||||
VOSK_MODEL_URL = "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"
|
||||
|
||||
VOSK_MODEL_DIR = os.path.abspath("model")
|
||||
|
||||
|
||||
|
||||
def download_and_setup_vosk():
|
||||
|
||||
"""Download and extract the Vosk model."""
|
||||
|
||||
print("Downloading Vosk model... This may take a few minutes.")
|
||||
|
||||
response = requests.get(VOSK_MODEL_URL, stream=True)
|
||||
|
||||
zip_path = "vosk_model.zip"
|
||||
|
||||
|
||||
|
||||
with open(zip_path, "wb") as file:
|
||||
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
|
||||
file.write(chunk)
|
||||
|
||||
|
||||
|
||||
print("Extracting Vosk model...")
|
||||
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
|
||||
zip_ref.extractall(VOSK_MODEL_DIR)
|
||||
|
||||
|
||||
|
||||
os.remove(zip_path)
|
||||
|
||||
print(f"Vosk model downloaded and set up in '{VOSK_MODEL_DIR}'.")
|
||||
|
||||
|
||||
|
||||
# Verify that the model directory contains necessary files
|
||||
|
||||
verify_model_files()
|
||||
|
||||
|
||||
|
||||
# Add model directory to .gitignore
|
||||
|
||||
with open(".gitignore", "a") as gitignore:
|
||||
|
||||
gitignore.write(f"\\n# Ignore Vosk model directory\\n{VOSK_MODEL_DIR}\\n")
|
||||
|
||||
|
||||
|
||||
def verify_model_files():
|
||||
"""Verify that all required files are present in the model directory."""
|
||||
required_files = ["conf/model.conf", "am/final.mdl"]
|
||||
missing_files = [file for file in required_files if not Path(VOSK_MODEL_DIR, file).exists()]
|
||||
if missing_files:
|
||||
raise Exception(f"Model file(s) missing: {', '.join(missing_files)}. Re-download the Vosk model manually.")
|
||||
|
||||
def convert_to_vosk_compatible_wav(input_file):
|
||||
|
||||
"""Convert the WAV file to a Vosk-compatible format using ffmpeg."""
|
||||
|
||||
output_file = "converted.wav"
|
||||
|
||||
try:
|
||||
|
||||
print(f"Converting {input_file} to Vosk-compatible format...")
|
||||
|
||||
subprocess.run([
|
||||
|
||||
"ffmpeg", "-i", input_file, "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", output_file
|
||||
|
||||
], check=True)
|
||||
|
||||
print(f"Converted file saved as {output_file}.")
|
||||
|
||||
return output_file
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
|
||||
print(f"Error converting file: {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def online_wav_to_text(input_file):
|
||||
|
||||
recognizer = sr.Recognizer()
|
||||
|
||||
with sr.AudioFile(input_file) as source:
|
||||
|
||||
print("Processing audio for online recognition...")
|
||||
|
||||
audio_data = recognizer.record(source)
|
||||
|
||||
for attempt in range(3): # Retry up to 3 times
|
||||
|
||||
try:
|
||||
|
||||
return recognizer.recognize_google(audio_data)
|
||||
|
||||
except sr.RequestError as e:
|
||||
|
||||
print(f"API error on attempt {attempt + 1}: {e}")
|
||||
|
||||
time.sleep(2 ** attempt) # Exponential backoff
|
||||
|
||||
except sr.UnknownValueError:
|
||||
|
||||
print("Speech recognition could not understand the audio.")
|
||||
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def offline_wav_to_text(input_file):
|
||||
|
||||
import wave # Ensure wave is imported before using offline recognition
|
||||
|
||||
|
||||
|
||||
model_path = Path(VOSK_MODEL_DIR)
|
||||
|
||||
if not model_path.exists():
|
||||
|
||||
print("Offline model not found. Would you like to set it up now? [y/N]")
|
||||
|
||||
choice = input().strip().lower()
|
||||
|
||||
if choice == 'y':
|
||||
|
||||
download_and_setup_vosk()
|
||||
|
||||
else:
|
||||
|
||||
print("Skipping offline setup. Exiting.")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
try:
|
||||
|
||||
verify_model_files()
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(e)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
wf = wave.open(input_file, "rb")
|
||||
|
||||
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() not in [8000, 16000]:
|
||||
|
||||
print("Audio file must be WAV format mono PCM.")
|
||||
|
||||
wf.close()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
model = Model(VOSK_MODEL_DIR)
|
||||
|
||||
recognizer = KaldiRecognizer(model, wf.getframerate())
|
||||
|
||||
|
||||
|
||||
print("Processing audio for offline recognition...")
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
|
||||
data = wf.readframes(4000)
|
||||
|
||||
if len(data) == 0:
|
||||
|
||||
break
|
||||
|
||||
if recognizer.AcceptWaveform(data):
|
||||
|
||||
results.append(recognizer.Result())
|
||||
|
||||
wf.close()
|
||||
|
||||
|
||||
|
||||
# Combine results into a single text
|
||||
|
||||
return " ".join([result["text"] for result in map(eval, results)])
|
||||
|
||||
|
||||
|
||||
def wav_to_text(input_file, output_file):
|
||||
|
||||
if not input_file.lower().endswith('.wav'):
|
||||
|
||||
raise ValueError("Input file must be a WAV file.")
|
||||
|
||||
|
||||
|
||||
# Check and convert file format if necessary
|
||||
|
||||
converted_file = convert_to_vosk_compatible_wav(input_file)
|
||||
|
||||
if not converted_file:
|
||||
|
||||
print("File conversion failed. Unable to proceed.")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
text = online_wav_to_text(converted_file)
|
||||
|
||||
if text is None: # Fallback to offline if online fails
|
||||
|
||||
print("Online recognition failed. Switching to offline recognition...")
|
||||
|
||||
text = offline_wav_to_text(converted_file)
|
||||
|
||||
|
||||
|
||||
if text:
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
|
||||
f.write(text)
|
||||
|
||||
print(f"Transcription completed successfully. Output saved to '{output_file}'.")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
|
||||
print("Transcription failed. Please check the error message above.")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
|
||||
print("Usage: python wav_to_text.py <input_file.wav> <output_file.txt>")
|
||||
|
||||
else:
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
output_file = sys.argv[2]
|
||||
|
||||
if not output_file.lower().endswith('.txt'):
|
||||
|
||||
print("Output file must have a .txt extension.")
|
||||
|
||||
elif not os.path.exists(input_file):
|
||||
|
||||
print(f"Input file {input_file} does not exist.")
|
||||
|
||||
else:
|
||||
|
||||
success = wav_to_text(input_file, output_file)
|
||||
|
||||
if not success:
|
||||
|
||||
sys.exit(1) # Exit with a non-zero status code on failure
|
||||
|
||||
Reference in New Issue
Block a user