fixed up file path, fixed up some gitignores, add version support, working on a better install and binary compilation
This commit is contained in:
86
utils/audiototext.py
Normal file
86
utils/audiototext.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import speech_recognition as sr
|
||||
from pydub import AudioSegment
|
||||
import os
|
||||
import sys
|
||||
|
||||
def convert_audio_to_text(input_audio_path, output_text_path, chunk_length_ms=30000):
|
||||
# Check if the input audio file exists
|
||||
if not os.path.isfile(input_audio_path):
|
||||
print(f"Error: The file '{input_audio_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_text_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
# Convert the audio to WAV format if needed
|
||||
wav_audio_path = input_audio_path
|
||||
if input_audio_path.lower().endswith('.mp3'):
|
||||
try:
|
||||
sound = AudioSegment.from_mp3(input_audio_path)
|
||||
wav_audio_path = input_audio_path.replace('.mp3', '.wav')
|
||||
sound.export(wav_audio_path, format="wav")
|
||||
print(f"Converted '{input_audio_path}' to WAV format.")
|
||||
except Exception as e:
|
||||
print(f"Error converting MP3 to WAV: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Initialize the recognizer
|
||||
recognizer = sr.Recognizer()
|
||||
|
||||
try:
|
||||
# Load the full audio file using pydub
|
||||
audio = AudioSegment.from_wav(wav_audio_path)
|
||||
|
||||
# Split audio into chunks and transcribe each chunk
|
||||
num_chunks = len(audio) // chunk_length_ms + 1
|
||||
full_text = ""
|
||||
|
||||
for i in range(num_chunks):
|
||||
start_time = i * chunk_length_ms
|
||||
end_time = min((i + 1) * chunk_length_ms, len(audio))
|
||||
audio_chunk = audio[start_time:end_time]
|
||||
chunk_path = f"temp_chunk_{i}.wav"
|
||||
audio_chunk.export(chunk_path, format="wav")
|
||||
|
||||
with sr.AudioFile(chunk_path) as source:
|
||||
audio_data = recognizer.record(source)
|
||||
|
||||
try:
|
||||
# Transcribe the chunk
|
||||
chunk_text = recognizer.recognize_google(audio_data)
|
||||
full_text += f"Chunk {i + 1}:\n{chunk_text}\n\n"
|
||||
except sr.UnknownValueError:
|
||||
print(f"Chunk {i + 1}: Unable to recognize speech.")
|
||||
except sr.RequestError as e:
|
||||
print(f"Error with chunk {i + 1}: {e}")
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# Clean up the temporary chunk file
|
||||
os.remove(chunk_path)
|
||||
|
||||
# Save the transcribed text to the output file
|
||||
with open(output_text_path, 'w', encoding='utf-8') as file:
|
||||
file.write(full_text)
|
||||
|
||||
print(f"Transcription complete. Check the '{output_text_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during transcription: {e}")
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# Clean up the temporary WAV file if it was created
|
||||
if wav_audio_path != input_audio_path and os.path.exists(wav_audio_path):
|
||||
os.remove(wav_audio_path)
|
||||
print(f"Deleted temporary file '{wav_audio_path}'.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python audiototext.py <input_audio_path> <output_text_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_audio_path = sys.argv[1]
|
||||
output_text_path = sys.argv[2]
|
||||
|
||||
convert_audio_to_text(input_audio_path, output_text_path)
|
||||
36
utils/csvmerge.py
Normal file
36
utils/csvmerge.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os
|
||||
|
||||
def merge_csv_files(output_csv_path, common_key, *input_csv_paths):
|
||||
if len(input_csv_paths) < 2:
|
||||
print("Error: Please provide at least two CSV files to merge.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the first CSV file
|
||||
merged_df = pd.read_csv(input_csv_paths[0])
|
||||
|
||||
# Merge each subsequent CSV file based on the common key
|
||||
for csv_path in input_csv_paths[1:]:
|
||||
df = pd.read_csv(csv_path)
|
||||
merged_df = pd.merge(merged_df, df, on=common_key, how='outer')
|
||||
|
||||
# Save the merged CSV to the output path
|
||||
merged_df.to_csv(output_csv_path, index=False)
|
||||
print(f"Merge complete. Check the '{output_csv_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during CSV merge: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: python csvmerge.py <output_csv_path> <common_key> <input_csv1> <input_csv2> ...")
|
||||
sys.exit(1)
|
||||
|
||||
output_csv_path = sys.argv[1]
|
||||
common_key = sys.argv[2]
|
||||
input_csv_paths = sys.argv[3:]
|
||||
|
||||
merge_csv_files(output_csv_path, common_key, *input_csv_paths)
|
||||
|
||||
43
utils/csvtoexcel.py
Normal file
43
utils/csvtoexcel.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python csvtoexcel.py <input_csv_path> <output_excel_path.xlsx>")
|
||||
sys.exit(1)
|
||||
|
||||
input_csv_path = sys.argv[1]
|
||||
output_excel_path = sys.argv[2]
|
||||
|
||||
# Check if input CSV file exists
|
||||
if not os.path.isfile(input_csv_path):
|
||||
print(f"Error: The input CSV file '{input_csv_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if output directory is writable
|
||||
output_dir = os.path.dirname(output_excel_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read CSV and convert to Excel
|
||||
df = pd.read_csv(input_csv_path)
|
||||
df.to_excel(output_excel_path, index=False)
|
||||
print(f"Conversion complete. Check the '{output_excel_path}' file.")
|
||||
except pd.errors.EmptyDataError:
|
||||
print(f"Error: The input CSV file '{input_csv_path}' is empty.")
|
||||
sys.exit(1)
|
||||
except pd.errors.ParserError as e:
|
||||
print(f"Error: Failed to parse CSV file '{input_csv_path}': {e}")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_excel_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error during conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
54
utils/csvtojson.py
Normal file
54
utils/csvtojson.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python csvtojson.py <input_csv_path> <output_json_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_csv_path = sys.argv[1]
|
||||
output_json_path = sys.argv[2]
|
||||
|
||||
# Check if the input CSV file exists
|
||||
if not os.path.isfile(input_csv_path):
|
||||
print(f"Error: The input CSV file '{input_csv_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_json_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the CSV file
|
||||
with open(input_csv_path, 'r', newline='', encoding='utf-8') as csv_file:
|
||||
reader = csv.DictReader(csv_file)
|
||||
data = list(reader)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input CSV file '{input_csv_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except csv.Error as e:
|
||||
print(f"Error reading CSV file: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while reading the CSV file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Write data to JSON file
|
||||
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
||||
json.dump(data, json_file, indent=4, ensure_ascii=False)
|
||||
print(f"Conversion complete. Check the '{output_json_path}' file.")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_json_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while writing the JSON file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
43
utils/csvtoyaml.py
Normal file
43
utils/csvtoyaml.py
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import yaml
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_csv_to_yaml(input_csv_path, output_yaml_path):
|
||||
# Check if the input CSV file exists
|
||||
if not os.path.isfile(input_csv_path):
|
||||
print(f"Error: The file '{input_csv_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_yaml_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the CSV file into a DataFrame
|
||||
df = pd.read_csv(input_csv_path)
|
||||
|
||||
# Convert the DataFrame to a list of dictionaries
|
||||
data = df.to_dict(orient='records')
|
||||
|
||||
# Write the data as YAML to the output file
|
||||
with open(output_yaml_path, 'w', encoding='utf-8') as file:
|
||||
yaml.dump(data, file, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during CSV to YAML conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python csvtoyaml.py <input_csv_path> <output_yaml_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_csv_path = sys.argv[1]
|
||||
output_yaml_path = sys.argv[2]
|
||||
|
||||
convert_csv_to_yaml(input_csv_path, output_yaml_path)
|
||||
44
utils/docxtomd.py
Normal file
44
utils/docxtomd.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
import docx
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_docx_to_md(input_docx_path, output_md_path):
|
||||
# Check if the input DOCX file exists
|
||||
if not os.path.isfile(input_docx_path):
|
||||
print(f"Error: The file '{input_docx_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_md_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load the DOCX file
|
||||
doc = docx.Document(input_docx_path)
|
||||
|
||||
# Extract text and convert to Markdown
|
||||
md_content = ""
|
||||
for para in doc.paragraphs:
|
||||
md_content += para.text + "\n\n"
|
||||
|
||||
# Write the Markdown content to the output file
|
||||
with open(output_md_path, 'w', encoding='utf-8') as file:
|
||||
file.write(md_content)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_md_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during DOCX to Markdown conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python docxtomd.py <input_docx_path> <output_md_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_docx_path = sys.argv[1]
|
||||
output_md_path = sys.argv[2]
|
||||
|
||||
convert_docx_to_md(input_docx_path, output_md_path)
|
||||
47
utils/exceltocsv.py
Normal file
47
utils/exceltocsv.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python excelto_csv.py <input_excel_path> <output_csv_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_excel_path = sys.argv[1]
|
||||
output_csv_path = sys.argv[2]
|
||||
|
||||
# Check if input Excel file exists
|
||||
if not os.path.isfile(input_excel_path):
|
||||
print(f"Error: The input Excel file '{input_excel_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_csv_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Excel file
|
||||
df = pd.read_excel(input_excel_path, engine='openpyxl')
|
||||
# Convert to CSV
|
||||
df.to_csv(output_csv_path, index=False)
|
||||
print(f"Conversion complete. Check the '{output_csv_path}' file.")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input Excel file '{input_excel_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except pd.errors.EmptyDataError:
|
||||
print(f"Error: The input Excel file '{input_excel_path}' is empty.")
|
||||
sys.exit(1)
|
||||
except pd.errors.ExcelFileError as e:
|
||||
print(f"Error reading the Excel file '{input_excel_path}': {e}")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_csv_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
42
utils/exceltojson.py
Normal file
42
utils/exceltojson.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_excel_to_json(input_excel_path, output_json_path):
|
||||
# Check if the input Excel file exists
|
||||
if not os.path.isfile(input_excel_path):
|
||||
print(f"Error: The file '{input_excel_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_json_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Excel file
|
||||
df = pd.read_excel(input_excel_path, engine='openpyxl')
|
||||
|
||||
# Convert the DataFrame to JSON
|
||||
json_content = df.to_json(orient='records', indent=4)
|
||||
|
||||
# Write the JSON content to the output file
|
||||
with open(output_json_path, 'w', encoding='utf-8') as file:
|
||||
file.write(json_content)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_json_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during Excel to JSON conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python exceltotojson.py <input_excel_path> <output_json_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_excel_path = sys.argv[1]
|
||||
output_json_path = sys.argv[2]
|
||||
|
||||
convert_excel_to_json(input_excel_path, output_json_path)
|
||||
54
utils/htmltomd.py
Normal file
54
utils/htmltomd.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import html2text
|
||||
import sys
|
||||
import os
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python html_to_md.py <input_html_path> <output_md_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_html_path = sys.argv[1]
|
||||
output_md_path = sys.argv[2]
|
||||
|
||||
# Check if input HTML file exists
|
||||
if not os.path.isfile(input_html_path):
|
||||
print(f"Error: The input HTML file '{input_html_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_md_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read HTML file content
|
||||
with open(input_html_path, 'r', encoding='utf-8') as html_file:
|
||||
html_content = html_file.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input HTML file '{input_html_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when reading '{input_html_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while reading the HTML file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Convert HTML to Markdown
|
||||
md_content = html2text.html2text(html_content)
|
||||
# Write Markdown content to output file
|
||||
with open(output_md_path, 'w', encoding='utf-8') as md_file:
|
||||
md_file.write(md_content)
|
||||
print(f"Conversion complete. Check the '{output_md_path}' file.")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_md_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while writing the Markdown file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
37
utils/htmltopdf.py
Normal file
37
utils/htmltopdf.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
import pdfkit
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_html_to_pdf(input_html_path, output_pdf_path):
|
||||
# Check if the input HTML file exists
|
||||
if not os.path.isfile(input_html_path):
|
||||
print(f"Error: The file '{input_html_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_pdf_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Specify the correct path to wkhtmltopdf
|
||||
config = pdfkit.configuration(wkhtmltopdf='/usr/bin/wkhtmltopdf')
|
||||
pdfkit.from_file(input_html_path, output_pdf_path, configuration=config)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_pdf_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during HTML to PDF conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python htmltopdf.py <input_html_path> <output_pdf_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_html_path = sys.argv[1]
|
||||
output_pdf_path = sys.argv[2]
|
||||
|
||||
convert_html_to_pdf(input_html_path, output_pdf_path)
|
||||
|
||||
60
utils/imagetomd.py
Executable file
60
utils/imagetomd.py
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
from PIL import Image, ImageEnhance
|
||||
import pytesseract
|
||||
import sys
|
||||
import os
|
||||
|
||||
def image_to_markdown(image_path, output_md_path):
|
||||
# Check if the input image file exists
|
||||
if not os.path.isfile(image_path):
|
||||
print(f"Error: The input image file '{image_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_md_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Open and preprocess the image
|
||||
img = Image.open(image_path)
|
||||
img = img.convert('L') # Convert to grayscale
|
||||
enhancer = ImageEnhance.Contrast(img)
|
||||
img = enhancer.enhance(2.0) # Increase contrast
|
||||
|
||||
# Use Tesseract to convert image to text
|
||||
text = pytesseract.image_to_string(img, lang='eng') # Specify language
|
||||
|
||||
# Create Markdown content
|
||||
md_content = "# Image Content\n\n"
|
||||
md_content += text
|
||||
|
||||
# Write the Markdown content to the output file
|
||||
with open(output_md_path, 'w', encoding='utf-8') as file:
|
||||
file.write(md_content)
|
||||
|
||||
print(f"Markdown file created at '{output_md_path}'")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input image file '{image_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_md_path}'.")
|
||||
sys.exit(1)
|
||||
except pytesseract.pytesseract.TesseractError as e:
|
||||
print(f"Error processing image with Tesseract: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: imagetomd <input_image_path> <output_md_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
image_to_markdown(input_path, output_path)
|
||||
|
||||
55
utils/jpgstopdf.py
Normal file
55
utils/jpgstopdf.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
from PIL import Image
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_images_to_pdf(image_paths, output_pdf_path):
|
||||
images = []
|
||||
|
||||
# Validate and open each image
|
||||
for image_path in image_paths:
|
||||
if not os.path.isfile(image_path):
|
||||
print(f"Error: The input image file '{image_path}' does not exist.")
|
||||
continue
|
||||
|
||||
try:
|
||||
img = Image.open(image_path)
|
||||
if img.mode == 'RGBA':
|
||||
img = img.convert('RGB')
|
||||
images.append(img)
|
||||
except Exception as e:
|
||||
print(f"Error processing image '{image_path}': {e}")
|
||||
continue
|
||||
|
||||
# Check if any valid images were loaded
|
||||
if not images:
|
||||
print("No valid images to convert. Please check your input files.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_pdf_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Save the images as a PDF
|
||||
images[0].save(output_pdf_path, save_all=True, append_images=images[1:])
|
||||
print(f"PDF created at '{output_pdf_path}'")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_pdf_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while saving the PDF: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
print("Usage: jpgstopdf <output_pdf_path> <input_jpg_path1> <input_jpg_path2> ...")
|
||||
sys.exit(1)
|
||||
|
||||
output_pdf_path = sys.argv[1]
|
||||
image_paths = sys.argv[2:]
|
||||
|
||||
convert_images_to_pdf(image_paths, output_pdf_path)
|
||||
|
||||
42
utils/jpgtopng.py
Normal file
42
utils/jpgtopng.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
from PIL import Image
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_jpg_to_png(input_path, output_path):
|
||||
# Check if the input JPG file exists
|
||||
if not os.path.isfile(input_path):
|
||||
print(f"Error: The input JPG file '{input_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Open and convert the JPG image to PNG
|
||||
img = Image.open(input_path)
|
||||
img = img.convert("RGB") # Ensure the image is in RGB mode
|
||||
img.save(output_path, "PNG")
|
||||
print(f"Conversion complete. Check the '{output_path}' file.")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input JPG file '{input_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: jpgtopng <input_jpg_path> <output_png_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
convert_jpg_to_png(input_path, output_path)
|
||||
62
utils/jsontocsv.py
Normal file
62
utils/jsontocsv.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import json
|
||||
import csv
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_json_to_csv(input_json_path, output_csv_path):
|
||||
# Check if the input JSON file exists
|
||||
if not os.path.isfile(input_json_path):
|
||||
print(f"Error: The input JSON file '{input_json_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_csv_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load JSON data
|
||||
with open(input_json_path, 'r', encoding='utf-8') as json_file:
|
||||
data = json.load(json_file)
|
||||
|
||||
if not isinstance(data, list) or not data:
|
||||
print("Error: The JSON file must contain a list of objects.")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error: Failed to decode JSON file '{input_json_path}'. Please check if the file is valid JSON.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error reading JSON file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Write data to CSV
|
||||
with open(output_csv_path, 'w', newline='', encoding='utf-8') as csv_file:
|
||||
writer = csv.writer(csv_file)
|
||||
|
||||
# Write headers
|
||||
headers = data[0].keys()
|
||||
writer.writerow(headers)
|
||||
|
||||
# Write data rows
|
||||
for row in data:
|
||||
writer.writerow([row.get(header, "") for header in headers])
|
||||
|
||||
print(f"Conversion complete. Check the '{output_csv_path}' file.")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_csv_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error writing CSV file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python jsontocsv.py <input_json_path> <output_csv_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_json_path = sys.argv[1]
|
||||
output_csv_path = sys.argv[2]
|
||||
|
||||
convert_json_to_csv(input_json_path, output_csv_path)
|
||||
41
utils/jsontoexcel.py
Normal file
41
utils/jsontoexcel.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
import pandas as pd
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_json_to_excel(input_json_path, output_excel_path):
|
||||
# Check if the input JSON file exists
|
||||
if not os.path.isfile(input_json_path):
|
||||
print(f"Error: The file '{input_json_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_excel_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the JSON file
|
||||
df = pd.read_json(input_json_path)
|
||||
|
||||
# Write the DataFrame to Excel
|
||||
df.to_excel(output_excel_path, index=False, engine='openpyxl')
|
||||
|
||||
print(f"Conversion complete. Check the '{output_excel_path}' file.")
|
||||
except ValueError:
|
||||
print(f"Error: Invalid JSON format in the file '{input_json_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error during JSON to Excel conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python jsontoexcel.py <input_json_path> <output_excel_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_json_path = sys.argv[1]
|
||||
output_excel_path = sys.argv[2]
|
||||
|
||||
convert_json_to_excel(input_json_path, output_excel_path)
|
||||
40
utils/jsontoyaml.py
Normal file
40
utils/jsontoyaml.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import json
|
||||
import yaml
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_json_to_yaml(input_json_path, output_yaml_path):
|
||||
# Check if the input JSON file exists
|
||||
if not os.path.isfile(input_json_path):
|
||||
print(f"Error: The file '{input_json_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_yaml_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load the JSON content
|
||||
with open(input_json_path, 'r', encoding='utf-8') as json_file:
|
||||
json_content = json.load(json_file)
|
||||
|
||||
# Write the YAML content
|
||||
with open(output_yaml_path, 'w', encoding='utf-8') as yaml_file:
|
||||
yaml.dump(json_content, yaml_file, default_flow_style=False, sort_keys=False)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during JSON to YAML conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python jsontoyaml.py <input_json_path> <output_yaml_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_json_path = sys.argv[1]
|
||||
output_yaml_path = sys.argv[2]
|
||||
|
||||
convert_json_to_yaml(input_json_path, output_yaml_path)
|
||||
61
utils/mdtabletocsv.py
Normal file
61
utils/mdtabletocsv.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
def extract_table_from_markdown(md_content):
|
||||
# Adjust regex pattern to be more flexible for matching Markdown tables
|
||||
table_pattern = re.compile(r"(\|[^\n]+\|\n?)+")
|
||||
match = table_pattern.search(md_content)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
table_str = match.group(0).strip()
|
||||
rows = table_str.splitlines()
|
||||
|
||||
# Extract headers
|
||||
headers = [col.strip() for col in rows[0].split('|')[1:-1]]
|
||||
|
||||
# Extract the actual data rows, skipping the header separator line
|
||||
data_rows = []
|
||||
for row in rows[2:]:
|
||||
values = [value.strip() for value in row.split('|')[1:-1]]
|
||||
data_rows.append(values)
|
||||
|
||||
return pd.DataFrame(data_rows, columns=headers)
|
||||
|
||||
def convert_md_table_to_csv(input_md_path, output_csv_path):
|
||||
# Check if the input Markdown file exists
|
||||
if not os.path.isfile(input_md_path):
|
||||
print(f"Error: The file '{input_md_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Markdown file content
|
||||
with open(input_md_path, 'r', encoding='utf-8') as md_file:
|
||||
md_content = md_file.read()
|
||||
|
||||
# Extract the table and convert to DataFrame
|
||||
df = extract_table_from_markdown(md_content)
|
||||
|
||||
if df is None:
|
||||
print("Error: No table found in the Markdown file.")
|
||||
sys.exit(1)
|
||||
|
||||
# Write the DataFrame to CSV
|
||||
df.to_csv(output_csv_path, index=False)
|
||||
print(f"Conversion complete. Check the '{output_csv_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during Markdown table to CSV conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python mdtabletocsv.py <input_md_path> <output_csv_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_md_path = sys.argv[1]
|
||||
output_csv_path = sys.argv[2]
|
||||
|
||||
convert_md_table_to_csv(input_md_path, output_csv_path)
|
||||
34
utils/mdtodocx.py
Normal file
34
utils/mdtodocx.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
import pypandoc
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_md_to_docx(input_md_path, output_docx_path):
|
||||
# Check if the input Markdown file exists
|
||||
if not os.path.isfile(input_md_path):
|
||||
print(f"Error: The file '{input_md_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_docx_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Convert Markdown to DOCX using pypandoc
|
||||
pypandoc.convert_file(input_md_path, 'docx', outputfile=output_docx_path)
|
||||
print(f"Conversion complete. Check the '{output_docx_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during Markdown to DOCX conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python mdtodocx.py <input_md_path> <output_docx_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_md_path = sys.argv[1]
|
||||
output_docx_path = sys.argv[2]
|
||||
|
||||
convert_md_to_docx(input_md_path, output_docx_path)
|
||||
55
utils/mdtohtml.py
Normal file
55
utils/mdtohtml.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import markdown
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_md_to_html(input_md_path, output_html_path):
|
||||
# Check if the input Markdown file exists
|
||||
if not os.path.isfile(input_md_path):
|
||||
print(f"Error: The input Markdown file '{input_md_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_html_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Markdown file content
|
||||
with open(input_md_path, 'r', encoding='utf-8') as md_file:
|
||||
md_content = md_file.read()
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input Markdown file '{input_md_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when reading '{input_md_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while reading the Markdown file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Convert Markdown content to HTML
|
||||
html_content = markdown.markdown(md_content)
|
||||
|
||||
# Write the HTML content to the output file
|
||||
with open(output_html_path, 'w', encoding='utf-8') as html_file:
|
||||
html_file.write(html_content)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_html_path}' file.")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_html_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while writing the HTML file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python mdtohtml.py <input_md_path> <output_html_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_md_path = sys.argv[1]
|
||||
output_html_path = sys.argv[2]
|
||||
|
||||
convert_md_to_html(input_md_path, output_html_path)
|
||||
45
utils/mdtopdf.py
Normal file
45
utils/mdtopdf.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import markdown
|
||||
import pdfkit
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_md_to_pdf(input_md_path, output_pdf_path):
|
||||
# Check if the input Markdown file exists
|
||||
if not os.path.isfile(input_md_path):
|
||||
print(f"Error: The file '{input_md_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_pdf_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Markdown file content
|
||||
with open(input_md_path, 'r', encoding='utf-8') as file:
|
||||
markdown_content = file.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading Markdown file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Convert Markdown to HTML
|
||||
html_content = markdown.markdown(markdown_content)
|
||||
|
||||
# Convert HTML to PDF using pdfkit
|
||||
pdfkit.from_string(html_content, output_pdf_path)
|
||||
print(f"Conversion complete. Check the '{output_pdf_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during conversion to PDF: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python mdtopdf.py <input_md_path> <output_pdf_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_md_path = sys.argv[1]
|
||||
output_pdf_path = sys.argv[2]
|
||||
|
||||
convert_md_to_pdf(input_md_path, output_pdf_path)
|
||||
90
utils/mdtoyaml.py
Normal file
90
utils/mdtoyaml.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import yaml
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
def parse_markdown_to_dict(markdown_content):
|
||||
lines = markdown_content.splitlines()
|
||||
yaml_dict = {}
|
||||
current_section = None
|
||||
current_list = None
|
||||
|
||||
for line in lines:
|
||||
# Skip empty lines
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
# Check for Markdown headers
|
||||
header_match = re.match(r'^(#+)\s+(.*)', line)
|
||||
if header_match:
|
||||
level = len(header_match.group(1))
|
||||
header = header_match.group(2).strip()
|
||||
|
||||
# Create a new section for the header
|
||||
current_section = {}
|
||||
yaml_dict[header] = current_section
|
||||
current_list = None # Reset current list
|
||||
|
||||
# Check for list items
|
||||
elif re.match(r'^\s*[\*-]\s+', line):
|
||||
item = line.strip().lstrip('*-').strip()
|
||||
if current_list is None:
|
||||
current_list = []
|
||||
current_section["items"] = current_list
|
||||
current_list.append(item)
|
||||
|
||||
# Treat other lines as plain text under the current section
|
||||
else:
|
||||
if current_list is not None:
|
||||
# Add the line as part of the last list item if inside a list
|
||||
current_list[-1] += " " + line.strip()
|
||||
else:
|
||||
# Add the line as a paragraph
|
||||
if "paragraph" not in current_section:
|
||||
current_section["paragraph"] = line.strip()
|
||||
else:
|
||||
current_section["paragraph"] += " " + line.strip()
|
||||
|
||||
return yaml_dict
|
||||
|
||||
def convert_md_to_yaml(input_md_path, output_yaml_path):
|
||||
# Check if the input Markdown file exists
|
||||
if not os.path.isfile(input_md_path):
|
||||
print(f"Error: The file '{input_md_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_yaml_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the Markdown file content
|
||||
with open(input_md_path, 'r', encoding='utf-8') as file:
|
||||
markdown_content = file.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading Markdown file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Convert Markdown to dictionary
|
||||
yaml_data = parse_markdown_to_dict(markdown_content)
|
||||
|
||||
try:
|
||||
# Write the dictionary as a YAML file
|
||||
with open(output_yaml_path, 'w', encoding='utf-8') as file:
|
||||
yaml.dump(yaml_data, file, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
print(f"Conversion complete. Check the '{output_yaml_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error writing YAML file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python mdtoyaml.py <input_md_path> <output_yaml_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_md_path = sys.argv[1]
|
||||
output_yaml_path = sys.argv[2]
|
||||
|
||||
convert_md_to_yaml(input_md_path, output_yaml_path)
|
||||
50
utils/mergepdfs.py
Normal file
50
utils/mergepdfs.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
from PyPDF2 import PdfMerger
|
||||
|
||||
def merge_pdfs(output_pdf, input_pdfs):
|
||||
merger = PdfMerger()
|
||||
|
||||
# Validate and add each input PDF
|
||||
for pdf in input_pdfs:
|
||||
if not os.path.isfile(pdf):
|
||||
print(f"Error: The input file '{pdf}' does not exist.")
|
||||
continue
|
||||
try:
|
||||
merger.append(pdf)
|
||||
print(f"Added '{pdf}' to the merger.")
|
||||
except Exception as e:
|
||||
print(f"Error processing file '{pdf}': {e}")
|
||||
|
||||
# Check if any valid PDFs were loaded
|
||||
if not merger.pages:
|
||||
print("No valid PDFs to merge. Please check your input files.")
|
||||
sys.exit(1)
|
||||
|
||||
# Attempt to write to the output PDF
|
||||
try:
|
||||
merger.write(output_pdf)
|
||||
print(f"PDFs merged successfully into '{output_pdf}'")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_pdf}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred while saving the PDF: {e}")
|
||||
sys.exit(1)
|
||||
finally:
|
||||
merger.close()
|
||||
|
||||
def main(args):
|
||||
if len(args) < 3:
|
||||
print("Usage: mergepdfs <output_pdf> <input_pdf1> <input_pdf2> ...")
|
||||
sys.exit(1)
|
||||
|
||||
output_pdf = args[0]
|
||||
input_pdfs = args[1:]
|
||||
|
||||
merge_pdfs(output_pdf, input_pdfs)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
|
||||
23
utils/pdftojpg.py
Normal file
23
utils/pdftojpg.py
Normal file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python3
|
||||
import fitz # PyMuPDF
|
||||
import sys
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: pdftojpg <input_pdf_path> <output_jpg_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
try:
|
||||
pdf_document = fitz.open(input_path)
|
||||
for page_num in range(len(pdf_document)):
|
||||
page = pdf_document.load_page(page_num)
|
||||
pix = page.get_pixmap()
|
||||
output_file = f"{output_path}_page{page_num+1}.jpg"
|
||||
pix.save(output_file)
|
||||
print(f"Saved {output_file}")
|
||||
pdf_document.close()
|
||||
except Exception as e:
|
||||
print(f"Error during conversion: {e}")
|
||||
sys.exit(1)
|
||||
60
utils/pdftomd.py
Executable file
60
utils/pdftomd.py
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
import fitz # PyMuPDF
|
||||
import sys
|
||||
import os
|
||||
|
||||
def pdf_to_markdown(pdf_path, output_md_path):
|
||||
# Check if the input PDF file exists
|
||||
if not os.path.isfile(pdf_path):
|
||||
print(f"Error: The input PDF file '{pdf_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_md_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Open the PDF document
|
||||
pdf_document = fitz.open(pdf_path)
|
||||
if pdf_document.page_count == 0:
|
||||
print("Error: The PDF file contains no pages.")
|
||||
pdf_document.close()
|
||||
sys.exit(1)
|
||||
|
||||
# Initialize the Markdown content
|
||||
md_content = "# PDF Content\n\n"
|
||||
|
||||
# Extract text from each page
|
||||
for page_num in range(len(pdf_document)):
|
||||
page = pdf_document.load_page(page_num)
|
||||
text = page.get_text()
|
||||
md_content += f"## Page {page_num + 1}\n\n{text}\n\n"
|
||||
|
||||
pdf_document.close()
|
||||
|
||||
# Write the Markdown content to the output file
|
||||
with open(output_md_path, 'w', encoding='utf-8') as file:
|
||||
file.write(md_content)
|
||||
|
||||
print(f"Markdown file created at '{output_md_path}'")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input PDF file '{pdf_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_md_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: pdftomd <input_pdf_path> <output_md_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
pdf_to_markdown(input_path, output_path)
|
||||
49
utils/pdftotext.py
Normal file
49
utils/pdftotext.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
import fitz # PyMuPDF
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_pdf_to_text(input_pdf_path, output_txt_path):
|
||||
# Check if the input PDF file exists
|
||||
if not os.path.isfile(input_pdf_path):
|
||||
print(f"Error: The input PDF file '{input_pdf_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_txt_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Open the PDF document
|
||||
pdf_document = fitz.open(input_pdf_path)
|
||||
if pdf_document.page_count == 0:
|
||||
print("Error: The PDF file contains no pages.")
|
||||
pdf_document.close()
|
||||
sys.exit(1)
|
||||
|
||||
# Extract text from each page and write it to the output file
|
||||
with open(output_txt_path, 'w', encoding='utf-8') as txt_file:
|
||||
for page_num in range(pdf_document.page_count):
|
||||
page = pdf_document.load_page(page_num)
|
||||
text = page.get_text()
|
||||
txt_file.write(f"--- Page {page_num + 1} ---\n")
|
||||
txt_file.write(text + "\n")
|
||||
|
||||
pdf_document.close()
|
||||
print(f"Text extraction complete. Check the '{output_txt_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during PDF to text conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python pdftotext.py <input_pdf_path> <output_txt_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_pdf_path = sys.argv[1]
|
||||
output_txt_path = sys.argv[2]
|
||||
|
||||
convert_pdf_to_text(input_pdf_path, output_txt_path)
|
||||
|
||||
42
utils/pngtojpg.py
Normal file
42
utils/pngtojpg.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
from PIL import Image
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_png_to_jpg(input_path, output_path):
|
||||
# Check if the input PNG file exists
|
||||
if not os.path.isfile(input_path):
|
||||
print(f"Error: The input PNG file '{input_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Open the PNG image and convert it to JPG
|
||||
img = Image.open(input_path)
|
||||
img = img.convert("RGB") # Ensure conversion to RGB
|
||||
img.save(output_path, "JPEG")
|
||||
print(f"Conversion complete. Check the '{output_path}' file.")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: The input PNG file '{input_path}' was not found.")
|
||||
sys.exit(1)
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: pngtojpg <input_png_path> <output_jpg_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_path = sys.argv[1]
|
||||
output_path = sys.argv[2]
|
||||
|
||||
convert_png_to_jpg(input_path, output_path)
|
||||
49
utils/texttospeech.py
Normal file
49
utils/texttospeech.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from gtts import gTTS
|
||||
from pydub import AudioSegment
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_text_to_speech(input_text_path, output_audio_path, lang='en'):
|
||||
# Check if the input text file exists
|
||||
if not os.path.isfile(input_text_path):
|
||||
print(f"Error: The file '{input_text_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_audio_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Read the text file content
|
||||
with open(input_text_path, 'r', encoding='utf-8') as file:
|
||||
text = file.read()
|
||||
|
||||
# Convert text to speech
|
||||
tts = gTTS(text=text, lang=lang, slow=False)
|
||||
temp_mp3_path = output_audio_path.replace('.wav', '.mp3')
|
||||
tts.save(temp_mp3_path)
|
||||
|
||||
# Convert the mp3 file to wav (if needed)
|
||||
if output_audio_path.endswith('.wav'):
|
||||
sound = AudioSegment.from_mp3(temp_mp3_path)
|
||||
sound.export(output_audio_path, format="wav")
|
||||
os.remove(temp_mp3_path)
|
||||
print(f"Converted '{input_text_path}' to '{output_audio_path}' as WAV format.")
|
||||
else:
|
||||
print(f"Converted '{input_text_path}' to '{temp_mp3_path}' as MP3 format.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during text-to-speech conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python texttospeech.py <input_text_path> <output_audio_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_text_path = sys.argv[1]
|
||||
output_audio_path = sys.argv[2]
|
||||
|
||||
convert_text_to_speech(input_text_path, output_audio_path)
|
||||
|
||||
56
utils/videotoaudio.py
Normal file
56
utils/videotoaudio.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from moviepy.editor import VideoFileClip
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_video_to_audio(input_video_path, output_audio_path):
|
||||
# Check if the input video file exists
|
||||
if not os.path.isfile(input_video_path):
|
||||
print(f"Error: The file '{input_video_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_audio_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load the video file
|
||||
video_clip = VideoFileClip(input_video_path)
|
||||
|
||||
# Check if the video has an audio track
|
||||
if video_clip.audio is None:
|
||||
print("Error: The video file does not contain an audio track.")
|
||||
video_clip.close()
|
||||
sys.exit(1)
|
||||
|
||||
# Extract audio
|
||||
audio_clip = video_clip.audio
|
||||
|
||||
# Save audio to the output file
|
||||
if output_audio_path.endswith('.mp3'):
|
||||
audio_clip.write_audiofile(output_audio_path, codec='mp3')
|
||||
elif output_audio_path.endswith('.wav'):
|
||||
audio_clip.write_audiofile(output_audio_path, codec='pcm_s16le')
|
||||
else:
|
||||
print("Error: Unsupported output format. Please use .mp3 or .wav")
|
||||
sys.exit(1)
|
||||
|
||||
# Close the clips to free resources
|
||||
audio_clip.close()
|
||||
video_clip.close()
|
||||
|
||||
print(f"Conversion complete. Check the '{output_audio_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred during video-to-audio conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python videotoaudio.py <input_video_path> <output_audio_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_video_path = sys.argv[1]
|
||||
output_audio_path = sys.argv[2]
|
||||
|
||||
convert_video_to_audio(input_video_path, output_audio_path)
|
||||
41
utils/yamltojson.py
Normal file
41
utils/yamltojson.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import yaml
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
def convert_yaml_to_json(input_yaml_path, output_json_path):
|
||||
# Check if the input YAML file exists
|
||||
if not os.path.isfile(input_yaml_path):
|
||||
print(f"Error: The file '{input_yaml_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_json_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load the YAML content
|
||||
with open(input_yaml_path, 'r', encoding='utf-8') as yaml_file:
|
||||
yaml_content = yaml.safe_load(yaml_file)
|
||||
|
||||
# Write the JSON content
|
||||
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
||||
json.dump(yaml_content, json_file, indent=4)
|
||||
|
||||
print(f"Conversion complete. Check the '{output_json_path}' file.")
|
||||
except Exception as e:
|
||||
print(f"Error during YAML to JSON conversion: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python yamlttojson.py <input_yaml_path> <output_json_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_yaml_path = sys.argv[1]
|
||||
output_json_path = sys.argv[2]
|
||||
|
||||
convert_yaml_to_json(input_yaml_path, output_json_path)
|
||||
|
||||
68
utils/yamltomd.py
Normal file
68
utils/yamltomd.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import yaml
|
||||
import sys
|
||||
import os
|
||||
|
||||
def dict_to_markdown(data, indent=0):
|
||||
markdown = ""
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
markdown += " " * indent + f"- **{key}**:\n"
|
||||
markdown += dict_to_markdown(value, indent + 2)
|
||||
elif isinstance(value, list):
|
||||
markdown += " " * indent + f"- **{key}**:\n"
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
markdown += dict_to_markdown(item, indent + 4)
|
||||
else:
|
||||
markdown += " " * (indent + 4) + f"- {item}\n"
|
||||
else:
|
||||
markdown += " " * indent + f"- **{key}**: {value}\n"
|
||||
return markdown
|
||||
|
||||
def convert_yaml_to_markdown(input_yaml_path, output_md_path):
|
||||
# Check if the input YAML file exists
|
||||
if not os.path.isfile(input_yaml_path):
|
||||
print(f"Error: The file '{input_yaml_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
# Check if the output directory is writable
|
||||
output_dir = os.path.dirname(output_md_path)
|
||||
if output_dir and not os.access(output_dir, os.W_OK):
|
||||
print(f"Error: The output directory '{output_dir}' is not writable.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Load the YAML content
|
||||
with open(input_yaml_path, 'r', encoding='utf-8') as file:
|
||||
yaml_content = yaml.safe_load(file)
|
||||
except yaml.YAMLError as e:
|
||||
print(f"Error parsing YAML file: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error reading YAML file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Convert YAML to Markdown
|
||||
markdown_content = dict_to_markdown(yaml_content)
|
||||
|
||||
try:
|
||||
# Write the Markdown content to the output file
|
||||
with open(output_md_path, 'w', encoding='utf-8') as file:
|
||||
file.write(markdown_content)
|
||||
print(f"Conversion complete. Check the '{output_md_path}' file.")
|
||||
except PermissionError:
|
||||
print(f"Error: Permission denied when writing to '{output_md_path}'.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error writing Markdown file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python yamltomd.py <input_yaml_path> <output_md_path>")
|
||||
sys.exit(1)
|
||||
|
||||
input_yaml_path = sys.argv[1]
|
||||
output_md_path = sys.argv[2]
|
||||
|
||||
convert_yaml_to_markdown(input_yaml_path, output_md_path)
|
||||
Reference in New Issue
Block a user