Added sessions deletion after 15 min

This commit is contained in:
2025-03-26 18:40:27 +11:00
parent 7ce2ac6d08
commit d5fa05dc09

View File

@@ -2,6 +2,7 @@ import os
import shutil import shutil
import subprocess import subprocess
import uuid import uuid
import threading
from flask import Flask, request, render_template, send_file, jsonify from flask import Flask, request, render_template, send_file, jsonify
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
@@ -12,7 +13,6 @@ BASE_OUTPUT_FOLDER = 'output'
os.makedirs(BASE_UPLOAD_FOLDER, exist_ok=True) os.makedirs(BASE_UPLOAD_FOLDER, exist_ok=True)
os.makedirs(BASE_OUTPUT_FOLDER, exist_ok=True) os.makedirs(BASE_OUTPUT_FOLDER, exist_ok=True)
def cleanup_session_folders(session_id): def cleanup_session_folders(session_id):
"""Remove any existing session folders to ensure a clean slate.""" """Remove any existing session folders to ensure a clean slate."""
upload_folder = os.path.join(BASE_UPLOAD_FOLDER, session_id) upload_folder = os.path.join(BASE_UPLOAD_FOLDER, session_id)
@@ -22,6 +22,12 @@ def cleanup_session_folders(session_id):
if os.path.exists(output_folder): if os.path.exists(output_folder):
shutil.rmtree(output_folder) shutil.rmtree(output_folder)
def schedule_cleanup(session_id, delay=900):
"""
Schedule the cleanup of a session folder after a specified delay (default: 900 seconds = 15 minutes).
"""
timer = threading.Timer(delay, lambda: cleanup_session_folders(session_id))
timer.start()
def create_session_folders(session_id): def create_session_folders(session_id):
""" """
@@ -35,7 +41,6 @@ def create_session_folders(session_id):
os.makedirs(output_folder, exist_ok=True) os.makedirs(output_folder, exist_ok=True)
return upload_folder, output_folder return upload_folder, output_folder
def run_ocrmypdf(input_pdf, output_pdf): def run_ocrmypdf(input_pdf, output_pdf):
cmd = [ cmd = [
'ocrmypdf', 'ocrmypdf',
@@ -53,14 +58,12 @@ def run_ocrmypdf(input_pdf, output_pdf):
else: else:
raise RuntimeError(f"OCRmyPDF failed: {result.stderr}") raise RuntimeError(f"OCRmyPDF failed: {result.stderr}")
@app.route('/') @app.route('/')
def index(): def index():
# Generate a new session id for each visitor. # Generate a new session id for each visitor.
session_id = str(uuid.uuid4()) session_id = str(uuid.uuid4())
return render_template('index.html', session_id=session_id) return render_template('index.html', session_id=session_id)
@app.route('/upload/<session_id>', methods=['POST']) @app.route('/upload/<session_id>', methods=['POST'])
def upload_files(session_id): def upload_files(session_id):
# Create fresh session folders, clearing any previous data. # Create fresh session folders, clearing any previous data.
@@ -94,6 +97,7 @@ def upload_files(session_id):
return jsonify({'error': f"Failed to process {filename}: {str(e)}"}), 500 return jsonify({'error': f"Failed to process {filename}: {str(e)}"}), 500
if not processed_files: if not processed_files:
schedule_cleanup(session_id)
return jsonify({ return jsonify({
'error': 'All files were skipped because they already contain selectable text.', 'error': 'All files were skipped because they already contain selectable text.',
'skipped_files': skipped_files 'skipped_files': skipped_files
@@ -103,13 +107,15 @@ def upload_files(session_id):
zip_filename = os.path.join(output_folder, 'processed_files.zip') zip_filename = os.path.join(output_folder, 'processed_files.zip')
subprocess.run(['zip', '-j', zip_filename] + [os.path.join(output_folder, f) for f in processed_files]) subprocess.run(['zip', '-j', zip_filename] + [os.path.join(output_folder, f) for f in processed_files])
# Schedule deletion of the session folders after 15 minutes.
schedule_cleanup(session_id)
return jsonify({ return jsonify({
'download_url': f'/download/{session_id}/processed_files.zip', 'download_url': f'/download/{session_id}/processed_files.zip',
'processed_files': processed_files, 'processed_files': processed_files,
'skipped_files': skipped_files 'skipped_files': skipped_files
}) })
@app.route('/download/<session_id>/<filename>') @app.route('/download/<session_id>/<filename>')
def download_file(session_id, filename): def download_file(session_id, filename):
file_path = os.path.join(BASE_OUTPUT_FOLDER, session_id, filename) file_path = os.path.join(BASE_OUTPUT_FOLDER, session_id, filename)
@@ -117,4 +123,4 @@ def download_file(session_id, filename):
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False) app.run(host='0.0.0.0', port=5000, debug=False)