fixed up file path, fixed up some gitignores, add version support, working on a better install and binary compilation

This commit is contained in:
klein panic
2024-11-14 22:22:18 -05:00
parent b6680be0ab
commit c1372606a6
52 changed files with 318 additions and 8267 deletions

86
utils/audiototext.py Normal file
View File

@@ -0,0 +1,86 @@
import speech_recognition as sr
from pydub import AudioSegment
import os
import sys
def convert_audio_to_text(input_audio_path, output_text_path, chunk_length_ms=30000):
# Check if the input audio file exists
if not os.path.isfile(input_audio_path):
print(f"Error: The file '{input_audio_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_text_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
# Convert the audio to WAV format if needed
wav_audio_path = input_audio_path
if input_audio_path.lower().endswith('.mp3'):
try:
sound = AudioSegment.from_mp3(input_audio_path)
wav_audio_path = input_audio_path.replace('.mp3', '.wav')
sound.export(wav_audio_path, format="wav")
print(f"Converted '{input_audio_path}' to WAV format.")
except Exception as e:
print(f"Error converting MP3 to WAV: {e}")
sys.exit(1)
# Initialize the recognizer
recognizer = sr.Recognizer()
try:
# Load the full audio file using pydub
audio = AudioSegment.from_wav(wav_audio_path)
# Split audio into chunks and transcribe each chunk
num_chunks = len(audio) // chunk_length_ms + 1
full_text = ""
for i in range(num_chunks):
start_time = i * chunk_length_ms
end_time = min((i + 1) * chunk_length_ms, len(audio))
audio_chunk = audio[start_time:end_time]
chunk_path = f"temp_chunk_{i}.wav"
audio_chunk.export(chunk_path, format="wav")
with sr.AudioFile(chunk_path) as source:
audio_data = recognizer.record(source)
try:
# Transcribe the chunk
chunk_text = recognizer.recognize_google(audio_data)
full_text += f"Chunk {i + 1}:\n{chunk_text}\n\n"
except sr.UnknownValueError:
print(f"Chunk {i + 1}: Unable to recognize speech.")
except sr.RequestError as e:
print(f"Error with chunk {i + 1}: {e}")
sys.exit(1)
finally:
# Clean up the temporary chunk file
os.remove(chunk_path)
# Save the transcribed text to the output file
with open(output_text_path, 'w', encoding='utf-8') as file:
file.write(full_text)
print(f"Transcription complete. Check the '{output_text_path}' file.")
except Exception as e:
print(f"An unexpected error occurred during transcription: {e}")
sys.exit(1)
finally:
# Clean up the temporary WAV file if it was created
if wav_audio_path != input_audio_path and os.path.exists(wav_audio_path):
os.remove(wav_audio_path)
print(f"Deleted temporary file '{wav_audio_path}'.")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python audiototext.py <input_audio_path> <output_text_path>")
sys.exit(1)
input_audio_path = sys.argv[1]
output_text_path = sys.argv[2]
convert_audio_to_text(input_audio_path, output_text_path)

36
utils/csvmerge.py Normal file
View File

@@ -0,0 +1,36 @@
import pandas as pd
import sys
import os
def merge_csv_files(output_csv_path, common_key, *input_csv_paths):
if len(input_csv_paths) < 2:
print("Error: Please provide at least two CSV files to merge.")
sys.exit(1)
try:
# Read the first CSV file
merged_df = pd.read_csv(input_csv_paths[0])
# Merge each subsequent CSV file based on the common key
for csv_path in input_csv_paths[1:]:
df = pd.read_csv(csv_path)
merged_df = pd.merge(merged_df, df, on=common_key, how='outer')
# Save the merged CSV to the output path
merged_df.to_csv(output_csv_path, index=False)
print(f"Merge complete. Check the '{output_csv_path}' file.")
except Exception as e:
print(f"Error during CSV merge: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) < 4:
print("Usage: python csvmerge.py <output_csv_path> <common_key> <input_csv1> <input_csv2> ...")
sys.exit(1)
output_csv_path = sys.argv[1]
common_key = sys.argv[2]
input_csv_paths = sys.argv[3:]
merge_csv_files(output_csv_path, common_key, *input_csv_paths)

43
utils/csvtoexcel.py Normal file
View File

@@ -0,0 +1,43 @@
import pandas as pd
import sys
import os
def main():
if len(sys.argv) != 3:
print("Usage: python csvtoexcel.py <input_csv_path> <output_excel_path.xlsx>")
sys.exit(1)
input_csv_path = sys.argv[1]
output_excel_path = sys.argv[2]
# Check if input CSV file exists
if not os.path.isfile(input_csv_path):
print(f"Error: The input CSV file '{input_csv_path}' does not exist.")
sys.exit(1)
# Check if output directory is writable
output_dir = os.path.dirname(output_excel_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read CSV and convert to Excel
df = pd.read_csv(input_csv_path)
df.to_excel(output_excel_path, index=False)
print(f"Conversion complete. Check the '{output_excel_path}' file.")
except pd.errors.EmptyDataError:
print(f"Error: The input CSV file '{input_csv_path}' is empty.")
sys.exit(1)
except pd.errors.ParserError as e:
print(f"Error: Failed to parse CSV file '{input_csv_path}': {e}")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_excel_path}'.")
sys.exit(1)
except Exception as e:
print(f"Error during conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

54
utils/csvtojson.py Normal file
View File

@@ -0,0 +1,54 @@
import csv
import json
import sys
import os
def main():
if len(sys.argv) != 3:
print("Usage: python csvtojson.py <input_csv_path> <output_json_path>")
sys.exit(1)
input_csv_path = sys.argv[1]
output_json_path = sys.argv[2]
# Check if the input CSV file exists
if not os.path.isfile(input_csv_path):
print(f"Error: The input CSV file '{input_csv_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_json_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the CSV file
with open(input_csv_path, 'r', newline='', encoding='utf-8') as csv_file:
reader = csv.DictReader(csv_file)
data = list(reader)
except FileNotFoundError:
print(f"Error: The input CSV file '{input_csv_path}' was not found.")
sys.exit(1)
except csv.Error as e:
print(f"Error reading CSV file: {e}")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while reading the CSV file: {e}")
sys.exit(1)
try:
# Write data to JSON file
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
print(f"Conversion complete. Check the '{output_json_path}' file.")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_json_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while writing the JSON file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

43
utils/csvtoyaml.py Normal file
View File

@@ -0,0 +1,43 @@
#!/usr/bin/env python3
import pandas as pd
import yaml
import sys
import os
def convert_csv_to_yaml(input_csv_path, output_yaml_path):
# Check if the input CSV file exists
if not os.path.isfile(input_csv_path):
print(f"Error: The file '{input_csv_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_yaml_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the CSV file into a DataFrame
df = pd.read_csv(input_csv_path)
# Convert the DataFrame to a list of dictionaries
data = df.to_dict(orient='records')
# Write the data as YAML to the output file
with open(output_yaml_path, 'w', encoding='utf-8') as file:
yaml.dump(data, file, default_flow_style=False, sort_keys=False, allow_unicode=True)
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
except Exception as e:
print(f"Error during CSV to YAML conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python csvtoyaml.py <input_csv_path> <output_yaml_path>")
sys.exit(1)
input_csv_path = sys.argv[1]
output_yaml_path = sys.argv[2]
convert_csv_to_yaml(input_csv_path, output_yaml_path)

44
utils/docxtomd.py Normal file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python3
import docx
import sys
import os
def convert_docx_to_md(input_docx_path, output_md_path):
# Check if the input DOCX file exists
if not os.path.isfile(input_docx_path):
print(f"Error: The file '{input_docx_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_md_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load the DOCX file
doc = docx.Document(input_docx_path)
# Extract text and convert to Markdown
md_content = ""
for para in doc.paragraphs:
md_content += para.text + "\n\n"
# Write the Markdown content to the output file
with open(output_md_path, 'w', encoding='utf-8') as file:
file.write(md_content)
print(f"Conversion complete. Check the '{output_md_path}' file.")
except Exception as e:
print(f"Error during DOCX to Markdown conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python docxtomd.py <input_docx_path> <output_md_path>")
sys.exit(1)
input_docx_path = sys.argv[1]
output_md_path = sys.argv[2]
convert_docx_to_md(input_docx_path, output_md_path)

47
utils/exceltocsv.py Normal file
View File

@@ -0,0 +1,47 @@
import pandas as pd
import sys
import os
def main():
if len(sys.argv) != 3:
print("Usage: python excelto_csv.py <input_excel_path> <output_csv_path>")
sys.exit(1)
input_excel_path = sys.argv[1]
output_csv_path = sys.argv[2]
# Check if input Excel file exists
if not os.path.isfile(input_excel_path):
print(f"Error: The input Excel file '{input_excel_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_csv_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the Excel file
df = pd.read_excel(input_excel_path, engine='openpyxl')
# Convert to CSV
df.to_csv(output_csv_path, index=False)
print(f"Conversion complete. Check the '{output_csv_path}' file.")
except FileNotFoundError:
print(f"Error: The input Excel file '{input_excel_path}' was not found.")
sys.exit(1)
except pd.errors.EmptyDataError:
print(f"Error: The input Excel file '{input_excel_path}' is empty.")
sys.exit(1)
except pd.errors.ExcelFileError as e:
print(f"Error reading the Excel file '{input_excel_path}': {e}")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_csv_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred during conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

42
utils/exceltojson.py Normal file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python3
import pandas as pd
import sys
import os
def convert_excel_to_json(input_excel_path, output_json_path):
# Check if the input Excel file exists
if not os.path.isfile(input_excel_path):
print(f"Error: The file '{input_excel_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_json_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the Excel file
df = pd.read_excel(input_excel_path, engine='openpyxl')
# Convert the DataFrame to JSON
json_content = df.to_json(orient='records', indent=4)
# Write the JSON content to the output file
with open(output_json_path, 'w', encoding='utf-8') as file:
file.write(json_content)
print(f"Conversion complete. Check the '{output_json_path}' file.")
except Exception as e:
print(f"Error during Excel to JSON conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python exceltotojson.py <input_excel_path> <output_json_path>")
sys.exit(1)
input_excel_path = sys.argv[1]
output_json_path = sys.argv[2]
convert_excel_to_json(input_excel_path, output_json_path)

54
utils/htmltomd.py Normal file
View File

@@ -0,0 +1,54 @@
import html2text
import sys
import os
def main():
if len(sys.argv) != 3:
print("Usage: python html_to_md.py <input_html_path> <output_md_path>")
sys.exit(1)
input_html_path = sys.argv[1]
output_md_path = sys.argv[2]
# Check if input HTML file exists
if not os.path.isfile(input_html_path):
print(f"Error: The input HTML file '{input_html_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_md_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read HTML file content
with open(input_html_path, 'r', encoding='utf-8') as html_file:
html_content = html_file.read()
except FileNotFoundError:
print(f"Error: The input HTML file '{input_html_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when reading '{input_html_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while reading the HTML file: {e}")
sys.exit(1)
try:
# Convert HTML to Markdown
md_content = html2text.html2text(html_content)
# Write Markdown content to output file
with open(output_md_path, 'w', encoding='utf-8') as md_file:
md_file.write(md_content)
print(f"Conversion complete. Check the '{output_md_path}' file.")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_md_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while writing the Markdown file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

37
utils/htmltopdf.py Normal file
View File

@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import pdfkit
import sys
import os
def convert_html_to_pdf(input_html_path, output_pdf_path):
# Check if the input HTML file exists
if not os.path.isfile(input_html_path):
print(f"Error: The file '{input_html_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_pdf_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Specify the correct path to wkhtmltopdf
config = pdfkit.configuration(wkhtmltopdf='/usr/bin/wkhtmltopdf')
pdfkit.from_file(input_html_path, output_pdf_path, configuration=config)
print(f"Conversion complete. Check the '{output_pdf_path}' file.")
except Exception as e:
print(f"Error during HTML to PDF conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python htmltopdf.py <input_html_path> <output_pdf_path>")
sys.exit(1)
input_html_path = sys.argv[1]
output_pdf_path = sys.argv[2]
convert_html_to_pdf(input_html_path, output_pdf_path)

60
utils/imagetomd.py Executable file
View File

@@ -0,0 +1,60 @@
#!/usr/bin/env python3
from PIL import Image, ImageEnhance
import pytesseract
import sys
import os
def image_to_markdown(image_path, output_md_path):
# Check if the input image file exists
if not os.path.isfile(image_path):
print(f"Error: The input image file '{image_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_md_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Open and preprocess the image
img = Image.open(image_path)
img = img.convert('L') # Convert to grayscale
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(2.0) # Increase contrast
# Use Tesseract to convert image to text
text = pytesseract.image_to_string(img, lang='eng') # Specify language
# Create Markdown content
md_content = "# Image Content\n\n"
md_content += text
# Write the Markdown content to the output file
with open(output_md_path, 'w', encoding='utf-8') as file:
file.write(md_content)
print(f"Markdown file created at '{output_md_path}'")
except FileNotFoundError:
print(f"Error: The input image file '{image_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_md_path}'.")
sys.exit(1)
except pytesseract.pytesseract.TesseractError as e:
print(f"Error processing image with Tesseract: {e}")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: imagetomd <input_image_path> <output_md_path>")
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
image_to_markdown(input_path, output_path)

55
utils/jpgstopdf.py Normal file
View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
from PIL import Image
import sys
import os
def convert_images_to_pdf(image_paths, output_pdf_path):
images = []
# Validate and open each image
for image_path in image_paths:
if not os.path.isfile(image_path):
print(f"Error: The input image file '{image_path}' does not exist.")
continue
try:
img = Image.open(image_path)
if img.mode == 'RGBA':
img = img.convert('RGB')
images.append(img)
except Exception as e:
print(f"Error processing image '{image_path}': {e}")
continue
# Check if any valid images were loaded
if not images:
print("No valid images to convert. Please check your input files.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_pdf_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Save the images as a PDF
images[0].save(output_pdf_path, save_all=True, append_images=images[1:])
print(f"PDF created at '{output_pdf_path}'")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_pdf_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while saving the PDF: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: jpgstopdf <output_pdf_path> <input_jpg_path1> <input_jpg_path2> ...")
sys.exit(1)
output_pdf_path = sys.argv[1]
image_paths = sys.argv[2:]
convert_images_to_pdf(image_paths, output_pdf_path)

42
utils/jpgtopng.py Normal file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python3
from PIL import Image
import sys
import os
def convert_jpg_to_png(input_path, output_path):
# Check if the input JPG file exists
if not os.path.isfile(input_path):
print(f"Error: The input JPG file '{input_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Open and convert the JPG image to PNG
img = Image.open(input_path)
img = img.convert("RGB") # Ensure the image is in RGB mode
img.save(output_path, "PNG")
print(f"Conversion complete. Check the '{output_path}' file.")
except FileNotFoundError:
print(f"Error: The input JPG file '{input_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred during conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: jpgtopng <input_jpg_path> <output_png_path>")
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
convert_jpg_to_png(input_path, output_path)

62
utils/jsontocsv.py Normal file
View File

@@ -0,0 +1,62 @@
import json
import csv
import sys
import os
def convert_json_to_csv(input_json_path, output_csv_path):
# Check if the input JSON file exists
if not os.path.isfile(input_json_path):
print(f"Error: The input JSON file '{input_json_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_csv_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load JSON data
with open(input_json_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
if not isinstance(data, list) or not data:
print("Error: The JSON file must contain a list of objects.")
sys.exit(1)
except json.JSONDecodeError:
print(f"Error: Failed to decode JSON file '{input_json_path}'. Please check if the file is valid JSON.")
sys.exit(1)
except Exception as e:
print(f"Error reading JSON file: {e}")
sys.exit(1)
try:
# Write data to CSV
with open(output_csv_path, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
# Write headers
headers = data[0].keys()
writer.writerow(headers)
# Write data rows
for row in data:
writer.writerow([row.get(header, "") for header in headers])
print(f"Conversion complete. Check the '{output_csv_path}' file.")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_csv_path}'.")
sys.exit(1)
except Exception as e:
print(f"Error writing CSV file: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python jsontocsv.py <input_json_path> <output_csv_path>")
sys.exit(1)
input_json_path = sys.argv[1]
output_csv_path = sys.argv[2]
convert_json_to_csv(input_json_path, output_csv_path)

41
utils/jsontoexcel.py Normal file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env python3
import pandas as pd
import sys
import os
def convert_json_to_excel(input_json_path, output_excel_path):
# Check if the input JSON file exists
if not os.path.isfile(input_json_path):
print(f"Error: The file '{input_json_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_excel_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the JSON file
df = pd.read_json(input_json_path)
# Write the DataFrame to Excel
df.to_excel(output_excel_path, index=False, engine='openpyxl')
print(f"Conversion complete. Check the '{output_excel_path}' file.")
except ValueError:
print(f"Error: Invalid JSON format in the file '{input_json_path}'.")
sys.exit(1)
except Exception as e:
print(f"Error during JSON to Excel conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python jsontoexcel.py <input_json_path> <output_excel_path>")
sys.exit(1)
input_json_path = sys.argv[1]
output_excel_path = sys.argv[2]
convert_json_to_excel(input_json_path, output_excel_path)

40
utils/jsontoyaml.py Normal file
View File

@@ -0,0 +1,40 @@
import json
import yaml
import sys
import os
def convert_json_to_yaml(input_json_path, output_yaml_path):
# Check if the input JSON file exists
if not os.path.isfile(input_json_path):
print(f"Error: The file '{input_json_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_yaml_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load the JSON content
with open(input_json_path, 'r', encoding='utf-8') as json_file:
json_content = json.load(json_file)
# Write the YAML content
with open(output_yaml_path, 'w', encoding='utf-8') as yaml_file:
yaml.dump(json_content, yaml_file, default_flow_style=False, sort_keys=False)
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
except Exception as e:
print(f"Error during JSON to YAML conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python jsontoyaml.py <input_json_path> <output_yaml_path>")
sys.exit(1)
input_json_path = sys.argv[1]
output_yaml_path = sys.argv[2]
convert_json_to_yaml(input_json_path, output_yaml_path)

61
utils/mdtabletocsv.py Normal file
View File

@@ -0,0 +1,61 @@
import pandas as pd
import re
import sys
import os
def extract_table_from_markdown(md_content):
# Adjust regex pattern to be more flexible for matching Markdown tables
table_pattern = re.compile(r"(\|[^\n]+\|\n?)+")
match = table_pattern.search(md_content)
if not match:
return None
table_str = match.group(0).strip()
rows = table_str.splitlines()
# Extract headers
headers = [col.strip() for col in rows[0].split('|')[1:-1]]
# Extract the actual data rows, skipping the header separator line
data_rows = []
for row in rows[2:]:
values = [value.strip() for value in row.split('|')[1:-1]]
data_rows.append(values)
return pd.DataFrame(data_rows, columns=headers)
def convert_md_table_to_csv(input_md_path, output_csv_path):
# Check if the input Markdown file exists
if not os.path.isfile(input_md_path):
print(f"Error: The file '{input_md_path}' does not exist.")
sys.exit(1)
try:
# Read the Markdown file content
with open(input_md_path, 'r', encoding='utf-8') as md_file:
md_content = md_file.read()
# Extract the table and convert to DataFrame
df = extract_table_from_markdown(md_content)
if df is None:
print("Error: No table found in the Markdown file.")
sys.exit(1)
# Write the DataFrame to CSV
df.to_csv(output_csv_path, index=False)
print(f"Conversion complete. Check the '{output_csv_path}' file.")
except Exception as e:
print(f"Error during Markdown table to CSV conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python mdtabletocsv.py <input_md_path> <output_csv_path>")
sys.exit(1)
input_md_path = sys.argv[1]
output_csv_path = sys.argv[2]
convert_md_table_to_csv(input_md_path, output_csv_path)

34
utils/mdtodocx.py Normal file
View File

@@ -0,0 +1,34 @@
#!/usr/bin/env python3
import pypandoc
import sys
import os
def convert_md_to_docx(input_md_path, output_docx_path):
# Check if the input Markdown file exists
if not os.path.isfile(input_md_path):
print(f"Error: The file '{input_md_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_docx_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Convert Markdown to DOCX using pypandoc
pypandoc.convert_file(input_md_path, 'docx', outputfile=output_docx_path)
print(f"Conversion complete. Check the '{output_docx_path}' file.")
except Exception as e:
print(f"Error during Markdown to DOCX conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python mdtodocx.py <input_md_path> <output_docx_path>")
sys.exit(1)
input_md_path = sys.argv[1]
output_docx_path = sys.argv[2]
convert_md_to_docx(input_md_path, output_docx_path)

55
utils/mdtohtml.py Normal file
View File

@@ -0,0 +1,55 @@
import markdown
import sys
import os
def convert_md_to_html(input_md_path, output_html_path):
# Check if the input Markdown file exists
if not os.path.isfile(input_md_path):
print(f"Error: The input Markdown file '{input_md_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_html_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the Markdown file content
with open(input_md_path, 'r', encoding='utf-8') as md_file:
md_content = md_file.read()
except FileNotFoundError:
print(f"Error: The input Markdown file '{input_md_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when reading '{input_md_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while reading the Markdown file: {e}")
sys.exit(1)
try:
# Convert Markdown content to HTML
html_content = markdown.markdown(md_content)
# Write the HTML content to the output file
with open(output_html_path, 'w', encoding='utf-8') as html_file:
html_file.write(html_content)
print(f"Conversion complete. Check the '{output_html_path}' file.")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_html_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while writing the HTML file: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python mdtohtml.py <input_md_path> <output_html_path>")
sys.exit(1)
input_md_path = sys.argv[1]
output_html_path = sys.argv[2]
convert_md_to_html(input_md_path, output_html_path)

45
utils/mdtopdf.py Normal file
View File

@@ -0,0 +1,45 @@
import markdown
import pdfkit
import sys
import os
def convert_md_to_pdf(input_md_path, output_pdf_path):
# Check if the input Markdown file exists
if not os.path.isfile(input_md_path):
print(f"Error: The file '{input_md_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_pdf_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the Markdown file content
with open(input_md_path, 'r', encoding='utf-8') as file:
markdown_content = file.read()
except Exception as e:
print(f"Error reading Markdown file: {e}")
sys.exit(1)
try:
# Convert Markdown to HTML
html_content = markdown.markdown(markdown_content)
# Convert HTML to PDF using pdfkit
pdfkit.from_string(html_content, output_pdf_path)
print(f"Conversion complete. Check the '{output_pdf_path}' file.")
except Exception as e:
print(f"Error during conversion to PDF: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python mdtopdf.py <input_md_path> <output_pdf_path>")
sys.exit(1)
input_md_path = sys.argv[1]
output_pdf_path = sys.argv[2]
convert_md_to_pdf(input_md_path, output_pdf_path)

90
utils/mdtoyaml.py Normal file
View File

@@ -0,0 +1,90 @@
import yaml
import sys
import os
import re
def parse_markdown_to_dict(markdown_content):
lines = markdown_content.splitlines()
yaml_dict = {}
current_section = None
current_list = None
for line in lines:
# Skip empty lines
if not line.strip():
continue
# Check for Markdown headers
header_match = re.match(r'^(#+)\s+(.*)', line)
if header_match:
level = len(header_match.group(1))
header = header_match.group(2).strip()
# Create a new section for the header
current_section = {}
yaml_dict[header] = current_section
current_list = None # Reset current list
# Check for list items
elif re.match(r'^\s*[\*-]\s+', line):
item = line.strip().lstrip('*-').strip()
if current_list is None:
current_list = []
current_section["items"] = current_list
current_list.append(item)
# Treat other lines as plain text under the current section
else:
if current_list is not None:
# Add the line as part of the last list item if inside a list
current_list[-1] += " " + line.strip()
else:
# Add the line as a paragraph
if "paragraph" not in current_section:
current_section["paragraph"] = line.strip()
else:
current_section["paragraph"] += " " + line.strip()
return yaml_dict
def convert_md_to_yaml(input_md_path, output_yaml_path):
# Check if the input Markdown file exists
if not os.path.isfile(input_md_path):
print(f"Error: The file '{input_md_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_yaml_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the Markdown file content
with open(input_md_path, 'r', encoding='utf-8') as file:
markdown_content = file.read()
except Exception as e:
print(f"Error reading Markdown file: {e}")
sys.exit(1)
# Convert Markdown to dictionary
yaml_data = parse_markdown_to_dict(markdown_content)
try:
# Write the dictionary as a YAML file
with open(output_yaml_path, 'w', encoding='utf-8') as file:
yaml.dump(yaml_data, file, default_flow_style=False, sort_keys=False, allow_unicode=True)
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
except Exception as e:
print(f"Error writing YAML file: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python mdtoyaml.py <input_md_path> <output_yaml_path>")
sys.exit(1)
input_md_path = sys.argv[1]
output_yaml_path = sys.argv[2]
convert_md_to_yaml(input_md_path, output_yaml_path)

50
utils/mergepdfs.py Normal file
View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3
import sys
import os
from PyPDF2 import PdfMerger
def merge_pdfs(output_pdf, input_pdfs):
merger = PdfMerger()
# Validate and add each input PDF
for pdf in input_pdfs:
if not os.path.isfile(pdf):
print(f"Error: The input file '{pdf}' does not exist.")
continue
try:
merger.append(pdf)
print(f"Added '{pdf}' to the merger.")
except Exception as e:
print(f"Error processing file '{pdf}': {e}")
# Check if any valid PDFs were loaded
if not merger.pages:
print("No valid PDFs to merge. Please check your input files.")
sys.exit(1)
# Attempt to write to the output PDF
try:
merger.write(output_pdf)
print(f"PDFs merged successfully into '{output_pdf}'")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_pdf}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred while saving the PDF: {e}")
sys.exit(1)
finally:
merger.close()
def main(args):
if len(args) < 3:
print("Usage: mergepdfs <output_pdf> <input_pdf1> <input_pdf2> ...")
sys.exit(1)
output_pdf = args[0]
input_pdfs = args[1:]
merge_pdfs(output_pdf, input_pdfs)
if __name__ == "__main__":
main(sys.argv[1:])

23
utils/pdftojpg.py Normal file
View File

@@ -0,0 +1,23 @@
#!/usr/bin/env python3
import fitz # PyMuPDF
import sys
if len(sys.argv) != 3:
print("Usage: pdftojpg <input_pdf_path> <output_jpg_path>")
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
try:
pdf_document = fitz.open(input_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
output_file = f"{output_path}_page{page_num+1}.jpg"
pix.save(output_file)
print(f"Saved {output_file}")
pdf_document.close()
except Exception as e:
print(f"Error during conversion: {e}")
sys.exit(1)

60
utils/pdftomd.py Executable file
View File

@@ -0,0 +1,60 @@
#!/usr/bin/env python3
import fitz # PyMuPDF
import sys
import os
def pdf_to_markdown(pdf_path, output_md_path):
# Check if the input PDF file exists
if not os.path.isfile(pdf_path):
print(f"Error: The input PDF file '{pdf_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_md_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Open the PDF document
pdf_document = fitz.open(pdf_path)
if pdf_document.page_count == 0:
print("Error: The PDF file contains no pages.")
pdf_document.close()
sys.exit(1)
# Initialize the Markdown content
md_content = "# PDF Content\n\n"
# Extract text from each page
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text = page.get_text()
md_content += f"## Page {page_num + 1}\n\n{text}\n\n"
pdf_document.close()
# Write the Markdown content to the output file
with open(output_md_path, 'w', encoding='utf-8') as file:
file.write(md_content)
print(f"Markdown file created at '{output_md_path}'")
except FileNotFoundError:
print(f"Error: The input PDF file '{pdf_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_md_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: pdftomd <input_pdf_path> <output_md_path>")
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
pdf_to_markdown(input_path, output_path)

49
utils/pdftotext.py Normal file
View File

@@ -0,0 +1,49 @@
#!/usr/bin/env python3
import fitz # PyMuPDF
import sys
import os
def convert_pdf_to_text(input_pdf_path, output_txt_path):
# Check if the input PDF file exists
if not os.path.isfile(input_pdf_path):
print(f"Error: The input PDF file '{input_pdf_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_txt_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Open the PDF document
pdf_document = fitz.open(input_pdf_path)
if pdf_document.page_count == 0:
print("Error: The PDF file contains no pages.")
pdf_document.close()
sys.exit(1)
# Extract text from each page and write it to the output file
with open(output_txt_path, 'w', encoding='utf-8') as txt_file:
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
text = page.get_text()
txt_file.write(f"--- Page {page_num + 1} ---\n")
txt_file.write(text + "\n")
pdf_document.close()
print(f"Text extraction complete. Check the '{output_txt_path}' file.")
except Exception as e:
print(f"Error during PDF to text conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python pdftotext.py <input_pdf_path> <output_txt_path>")
sys.exit(1)
input_pdf_path = sys.argv[1]
output_txt_path = sys.argv[2]
convert_pdf_to_text(input_pdf_path, output_txt_path)

42
utils/pngtojpg.py Normal file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python3
from PIL import Image
import sys
import os
def convert_png_to_jpg(input_path, output_path):
# Check if the input PNG file exists
if not os.path.isfile(input_path):
print(f"Error: The input PNG file '{input_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Open the PNG image and convert it to JPG
img = Image.open(input_path)
img = img.convert("RGB") # Ensure conversion to RGB
img.save(output_path, "JPEG")
print(f"Conversion complete. Check the '{output_path}' file.")
except FileNotFoundError:
print(f"Error: The input PNG file '{input_path}' was not found.")
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied when writing to '{output_path}'.")
sys.exit(1)
except Exception as e:
print(f"An unexpected error occurred during conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: pngtojpg <input_png_path> <output_jpg_path>")
sys.exit(1)
input_path = sys.argv[1]
output_path = sys.argv[2]
convert_png_to_jpg(input_path, output_path)

49
utils/texttospeech.py Normal file
View File

@@ -0,0 +1,49 @@
from gtts import gTTS
from pydub import AudioSegment
import sys
import os
def convert_text_to_speech(input_text_path, output_audio_path, lang='en'):
# Check if the input text file exists
if not os.path.isfile(input_text_path):
print(f"Error: The file '{input_text_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_audio_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Read the text file content
with open(input_text_path, 'r', encoding='utf-8') as file:
text = file.read()
# Convert text to speech
tts = gTTS(text=text, lang=lang, slow=False)
temp_mp3_path = output_audio_path.replace('.wav', '.mp3')
tts.save(temp_mp3_path)
# Convert the mp3 file to wav (if needed)
if output_audio_path.endswith('.wav'):
sound = AudioSegment.from_mp3(temp_mp3_path)
sound.export(output_audio_path, format="wav")
os.remove(temp_mp3_path)
print(f"Converted '{input_text_path}' to '{output_audio_path}' as WAV format.")
else:
print(f"Converted '{input_text_path}' to '{temp_mp3_path}' as MP3 format.")
except Exception as e:
print(f"An unexpected error occurred during text-to-speech conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python texttospeech.py <input_text_path> <output_audio_path>")
sys.exit(1)
input_text_path = sys.argv[1]
output_audio_path = sys.argv[2]
convert_text_to_speech(input_text_path, output_audio_path)

56
utils/videotoaudio.py Normal file
View File

@@ -0,0 +1,56 @@
from moviepy.editor import VideoFileClip
import sys
import os
def convert_video_to_audio(input_video_path, output_audio_path):
# Check if the input video file exists
if not os.path.isfile(input_video_path):
print(f"Error: The file '{input_video_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_audio_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load the video file
video_clip = VideoFileClip(input_video_path)
# Check if the video has an audio track
if video_clip.audio is None:
print("Error: The video file does not contain an audio track.")
video_clip.close()
sys.exit(1)
# Extract audio
audio_clip = video_clip.audio
# Save audio to the output file
if output_audio_path.endswith('.mp3'):
audio_clip.write_audiofile(output_audio_path, codec='mp3')
elif output_audio_path.endswith('.wav'):
audio_clip.write_audiofile(output_audio_path, codec='pcm_s16le')
else:
print("Error: Unsupported output format. Please use .mp3 or .wav")
sys.exit(1)
# Close the clips to free resources
audio_clip.close()
video_clip.close()
print(f"Conversion complete. Check the '{output_audio_path}' file.")
except Exception as e:
print(f"An unexpected error occurred during video-to-audio conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python videotoaudio.py <input_video_path> <output_audio_path>")
sys.exit(1)
input_video_path = sys.argv[1]
output_audio_path = sys.argv[2]
convert_video_to_audio(input_video_path, output_audio_path)

41
utils/yamltojson.py Normal file
View File

@@ -0,0 +1,41 @@
import yaml
import json
import sys
import os
def convert_yaml_to_json(input_yaml_path, output_json_path):
# Check if the input YAML file exists
if not os.path.isfile(input_yaml_path):
print(f"Error: The file '{input_yaml_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_json_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load the YAML content
with open(input_yaml_path, 'r', encoding='utf-8') as yaml_file:
yaml_content = yaml.safe_load(yaml_file)
# Write the JSON content
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(yaml_content, json_file, indent=4)
print(f"Conversion complete. Check the '{output_json_path}' file.")
except Exception as e:
print(f"Error during YAML to JSON conversion: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python yamlttojson.py <input_yaml_path> <output_json_path>")
sys.exit(1)
input_yaml_path = sys.argv[1]
output_json_path = sys.argv[2]
convert_yaml_to_json(input_yaml_path, output_json_path)

68
utils/yamltomd.py Normal file
View File

@@ -0,0 +1,68 @@
import yaml
import sys
import os
def dict_to_markdown(data, indent=0):
markdown = ""
for key, value in data.items():
if isinstance(value, dict):
markdown += " " * indent + f"- **{key}**:\n"
markdown += dict_to_markdown(value, indent + 2)
elif isinstance(value, list):
markdown += " " * indent + f"- **{key}**:\n"
for item in value:
if isinstance(item, dict):
markdown += dict_to_markdown(item, indent + 4)
else:
markdown += " " * (indent + 4) + f"- {item}\n"
else:
markdown += " " * indent + f"- **{key}**: {value}\n"
return markdown
def convert_yaml_to_markdown(input_yaml_path, output_md_path):
# Check if the input YAML file exists
if not os.path.isfile(input_yaml_path):
print(f"Error: The file '{input_yaml_path}' does not exist.")
sys.exit(1)
# Check if the output directory is writable
output_dir = os.path.dirname(output_md_path)
if output_dir and not os.access(output_dir, os.W_OK):
print(f"Error: The output directory '{output_dir}' is not writable.")
sys.exit(1)
try:
# Load the YAML content
with open(input_yaml_path, 'r', encoding='utf-8') as file:
yaml_content = yaml.safe_load(file)
except yaml.YAMLError as e:
print(f"Error parsing YAML file: {e}")
sys.exit(1)
except Exception as e:
print(f"Error reading YAML file: {e}")
sys.exit(1)
# Convert YAML to Markdown
markdown_content = dict_to_markdown(yaml_content)
try:
# Write the Markdown content to the output file
with open(output_md_path, 'w', encoding='utf-8') as file:
file.write(markdown_content)
print(f"Conversion complete. Check the '{output_md_path}' file.")
except PermissionError:
print(f"Error: Permission denied when writing to '{output_md_path}'.")
sys.exit(1)
except Exception as e:
print(f"Error writing Markdown file: {e}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python yamltomd.py <input_yaml_path> <output_md_path>")
sys.exit(1)
input_yaml_path = sys.argv[1]
output_md_path = sys.argv[2]
convert_yaml_to_markdown(input_yaml_path, output_md_path)