Added sessions deletion after 15 min
This commit is contained in:
18
app/app.py
18
app/app.py
@@ -2,6 +2,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import uuid
|
import uuid
|
||||||
|
import threading
|
||||||
from flask import Flask, request, render_template, send_file, jsonify
|
from flask import Flask, request, render_template, send_file, jsonify
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
@@ -12,7 +13,6 @@ BASE_OUTPUT_FOLDER = 'output'
|
|||||||
os.makedirs(BASE_UPLOAD_FOLDER, exist_ok=True)
|
os.makedirs(BASE_UPLOAD_FOLDER, exist_ok=True)
|
||||||
os.makedirs(BASE_OUTPUT_FOLDER, exist_ok=True)
|
os.makedirs(BASE_OUTPUT_FOLDER, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def cleanup_session_folders(session_id):
|
def cleanup_session_folders(session_id):
|
||||||
"""Remove any existing session folders to ensure a clean slate."""
|
"""Remove any existing session folders to ensure a clean slate."""
|
||||||
upload_folder = os.path.join(BASE_UPLOAD_FOLDER, session_id)
|
upload_folder = os.path.join(BASE_UPLOAD_FOLDER, session_id)
|
||||||
@@ -22,6 +22,12 @@ def cleanup_session_folders(session_id):
|
|||||||
if os.path.exists(output_folder):
|
if os.path.exists(output_folder):
|
||||||
shutil.rmtree(output_folder)
|
shutil.rmtree(output_folder)
|
||||||
|
|
||||||
|
def schedule_cleanup(session_id, delay=900):
|
||||||
|
"""
|
||||||
|
Schedule the cleanup of a session folder after a specified delay (default: 900 seconds = 15 minutes).
|
||||||
|
"""
|
||||||
|
timer = threading.Timer(delay, lambda: cleanup_session_folders(session_id))
|
||||||
|
timer.start()
|
||||||
|
|
||||||
def create_session_folders(session_id):
|
def create_session_folders(session_id):
|
||||||
"""
|
"""
|
||||||
@@ -35,7 +41,6 @@ def create_session_folders(session_id):
|
|||||||
os.makedirs(output_folder, exist_ok=True)
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
return upload_folder, output_folder
|
return upload_folder, output_folder
|
||||||
|
|
||||||
|
|
||||||
def run_ocrmypdf(input_pdf, output_pdf):
|
def run_ocrmypdf(input_pdf, output_pdf):
|
||||||
cmd = [
|
cmd = [
|
||||||
'ocrmypdf',
|
'ocrmypdf',
|
||||||
@@ -53,14 +58,12 @@ def run_ocrmypdf(input_pdf, output_pdf):
|
|||||||
else:
|
else:
|
||||||
raise RuntimeError(f"OCRmyPDF failed: {result.stderr}")
|
raise RuntimeError(f"OCRmyPDF failed: {result.stderr}")
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
# Generate a new session id for each visitor.
|
# Generate a new session id for each visitor.
|
||||||
session_id = str(uuid.uuid4())
|
session_id = str(uuid.uuid4())
|
||||||
return render_template('index.html', session_id=session_id)
|
return render_template('index.html', session_id=session_id)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/upload/<session_id>', methods=['POST'])
|
@app.route('/upload/<session_id>', methods=['POST'])
|
||||||
def upload_files(session_id):
|
def upload_files(session_id):
|
||||||
# Create fresh session folders, clearing any previous data.
|
# Create fresh session folders, clearing any previous data.
|
||||||
@@ -94,6 +97,7 @@ def upload_files(session_id):
|
|||||||
return jsonify({'error': f"Failed to process {filename}: {str(e)}"}), 500
|
return jsonify({'error': f"Failed to process {filename}: {str(e)}"}), 500
|
||||||
|
|
||||||
if not processed_files:
|
if not processed_files:
|
||||||
|
schedule_cleanup(session_id)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'error': 'All files were skipped because they already contain selectable text.',
|
'error': 'All files were skipped because they already contain selectable text.',
|
||||||
'skipped_files': skipped_files
|
'skipped_files': skipped_files
|
||||||
@@ -103,13 +107,15 @@ def upload_files(session_id):
|
|||||||
zip_filename = os.path.join(output_folder, 'processed_files.zip')
|
zip_filename = os.path.join(output_folder, 'processed_files.zip')
|
||||||
subprocess.run(['zip', '-j', zip_filename] + [os.path.join(output_folder, f) for f in processed_files])
|
subprocess.run(['zip', '-j', zip_filename] + [os.path.join(output_folder, f) for f in processed_files])
|
||||||
|
|
||||||
|
# Schedule deletion of the session folders after 15 minutes.
|
||||||
|
schedule_cleanup(session_id)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'download_url': f'/download/{session_id}/processed_files.zip',
|
'download_url': f'/download/{session_id}/processed_files.zip',
|
||||||
'processed_files': processed_files,
|
'processed_files': processed_files,
|
||||||
'skipped_files': skipped_files
|
'skipped_files': skipped_files
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@app.route('/download/<session_id>/<filename>')
|
@app.route('/download/<session_id>/<filename>')
|
||||||
def download_file(session_id, filename):
|
def download_file(session_id, filename):
|
||||||
file_path = os.path.join(BASE_OUTPUT_FOLDER, session_id, filename)
|
file_path = os.path.join(BASE_OUTPUT_FOLDER, session_id, filename)
|
||||||
@@ -117,4 +123,4 @@ def download_file(session_id, filename):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||||
Reference in New Issue
Block a user