From f507de825626d5e4f40e3bbbfb17bba83314de17 Mon Sep 17 00:00:00 2001 From: Ember Hearth Date: Sat, 26 Nov 2022 21:24:00 -0500 Subject: [PATCH] Improve the migration script, removing the need for --legacy --- cleanup.py | 2 +- fhost.py | 26 +----------- migrations/versions/939a08e1d6e5_.py | 61 ++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/cleanup.py b/cleanup.py index cc998df..14fbc61 100755 --- a/cleanup.py +++ b/cleanup.py @@ -3,6 +3,6 @@ print("This script has been replaced!!") print("Instead, please run") print("") -print(" $ FLASK_APP=fhost flask prune --legacy") +print(" $ FLASK_APP=fhost flask prune") print("") exit(1); diff --git a/fhost.py b/fhost.py index a908993..469669a 100755 --- a/fhost.py +++ b/fhost.py @@ -443,22 +443,13 @@ def ehandler(e): return "Segmentation fault\n", e.code @app.cli.command("prune") -@click.option( - '-l', '--legacy', - is_flag=True, - default=False, - help="Also look for legacy files" -) -def prune(legacy: bool): +def prune(): """ Clean up expired files Deletes any files from the filesystem which have hit their expiration time. This doesn't remove them from the database, only from the filesystem. It's recommended that server owners run this command regularly, or set it up on a timer. - - Server owners who recently applied a migration that creates legacy files should run - this script with the --legacy/-l flag enabled for at least FHOST_MAX_EXPIRATION. """ current_time = time.time() * 1000; @@ -503,21 +494,6 @@ def prune(legacy: bool): file.expiration = None; db.session.commit() - # Prior to 0x0 tracking file expiration times, files were removed by scanning the - # filesystem. If this system was recently migrated from the old system, there might - # still be files whose expirations aren't tracked and must be noticed the old way. - # Therefore, we perform an additional check on files in the upload directory. - if legacy: - for file in os.listdir(storage): - file_path = storage / file - stat = os.stat(file_path) - file_age = (current_time - stat.st_mtime * 1000) # How long the file has existed, in ms - max_age = get_max_lifespan(stat.st_size) - if file_age > max_age: - print(f"Removing legacy file {file_path}") - os.remove(file_path) - files_removed += 1; - print(f"\nDone! {files_removed} file(s) removed") """ For a file of a given size, determine the largest allowed lifespan of that file diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 8a47428..3b9b418 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -11,12 +11,73 @@ revision = '939a08e1d6e5' down_revision = '7e246705da6a' from alembic import op +from flask import current_app +from flask_sqlalchemy import SQLAlchemy +from pathlib import Path import sqlalchemy as sa +import os +import time + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = current_app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = current_app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) + +db = SQLAlchemy(current_app.__weakref__()) + +# Representations of the original and updated File tables +class File(db.Model): + id = db.Column(db.Integer, primary_key = True) + sha256 = db.Column(db.String, unique = True) + ext = db.Column(db.UnicodeText) + mime = db.Column(db.UnicodeText) + addr = db.Column(db.UnicodeText) + removed = db.Column(db.Boolean, default=False) + nsfw_score = db.Column(db.Float) +UpdatedFile = sa.table('file', + # We only need to describe the columns that are relevent to us + sa.column('id', db.Integer), + sa.column('expiration', db.BigInteger) +) def upgrade(): op.add_column('file', sa.Column('expiration', sa.BigInteger())) + storage = Path(current_app.config["FHOST_STORAGE_PATH"]) + current_time = time.time() * 1000; + + # List of file hashes which have not expired yet + # This could get really big for some servers + unexpired_files = set(os.listdir(storage)) + + # Calculate an expiration date for all existing files + files = File.query\ + .where( + sa.not_(File.removed) + ).all() + for file in files: + if file.sha256 in unexpired_files: + file_path = storage / file.sha256 + stat = os.stat(file_path) + max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms + file_birth = stat.st_mtime * 1000 # When the file was created, in ms + op.execute( + sa.update(UpdatedFile) + .where(UpdatedFile.c.id == file.id) + .values({'expiration': int(file_birth + max_age)}) + ) def downgrade(): op.drop_column('file', 'expiration')