Improve the migration script, removing the need for --legacy

This commit is contained in:
Ember Hearth 2022-11-26 21:24:00 -05:00
parent bed8c1d047
commit f507de8256
3 changed files with 63 additions and 26 deletions

View file

@ -3,6 +3,6 @@
print("This script has been replaced!!") print("This script has been replaced!!")
print("Instead, please run") print("Instead, please run")
print("") print("")
print(" $ FLASK_APP=fhost flask prune --legacy") print(" $ FLASK_APP=fhost flask prune")
print("") print("")
exit(1); exit(1);

View file

@ -443,22 +443,13 @@ def ehandler(e):
return "Segmentation fault\n", e.code return "Segmentation fault\n", e.code
@app.cli.command("prune") @app.cli.command("prune")
@click.option( def prune():
'-l', '--legacy',
is_flag=True,
default=False,
help="Also look for legacy files"
)
def prune(legacy: bool):
""" """
Clean up expired files Clean up expired files
Deletes any files from the filesystem which have hit their expiration time. This Deletes any files from the filesystem which have hit their expiration time. This
doesn't remove them from the database, only from the filesystem. It's recommended doesn't remove them from the database, only from the filesystem. It's recommended
that server owners run this command regularly, or set it up on a timer. that server owners run this command regularly, or set it up on a timer.
Server owners who recently applied a migration that creates legacy files should run
this script with the --legacy/-l flag enabled for at least FHOST_MAX_EXPIRATION.
""" """
current_time = time.time() * 1000; current_time = time.time() * 1000;
@ -503,21 +494,6 @@ def prune(legacy: bool):
file.expiration = None; file.expiration = None;
db.session.commit() db.session.commit()
# Prior to 0x0 tracking file expiration times, files were removed by scanning the
# filesystem. If this system was recently migrated from the old system, there might
# still be files whose expirations aren't tracked and must be noticed the old way.
# Therefore, we perform an additional check on files in the upload directory.
if legacy:
for file in os.listdir(storage):
file_path = storage / file
stat = os.stat(file_path)
file_age = (current_time - stat.st_mtime * 1000) # How long the file has existed, in ms
max_age = get_max_lifespan(stat.st_size)
if file_age > max_age:
print(f"Removing legacy file {file_path}")
os.remove(file_path)
files_removed += 1;
print(f"\nDone! {files_removed} file(s) removed") print(f"\nDone! {files_removed} file(s) removed")
""" For a file of a given size, determine the largest allowed lifespan of that file """ For a file of a given size, determine the largest allowed lifespan of that file

View file

@ -11,12 +11,73 @@ revision = '939a08e1d6e5'
down_revision = '7e246705da6a' down_revision = '7e246705da6a'
from alembic import op from alembic import op
from flask import current_app
from flask_sqlalchemy import SQLAlchemy
from pathlib import Path
import sqlalchemy as sa import sqlalchemy as sa
import os
import time
""" For a file of a given size, determine the largest allowed lifespan of that file
Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well
as FHOST_{MIN,MAX}_EXPIRATION.
This lifespan may be shortened by a user's request, but no files should be allowed to
expire at a point after this number.
Value returned is a duration in milliseconds.
"""
def get_max_lifespan(filesize: int) -> int:
min_exp = current_app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000)
max_exp = current_app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000)
max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024)
return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3)
db = SQLAlchemy(current_app.__weakref__())
# Representations of the original and updated File tables
class File(db.Model):
id = db.Column(db.Integer, primary_key = True)
sha256 = db.Column(db.String, unique = True)
ext = db.Column(db.UnicodeText)
mime = db.Column(db.UnicodeText)
addr = db.Column(db.UnicodeText)
removed = db.Column(db.Boolean, default=False)
nsfw_score = db.Column(db.Float)
UpdatedFile = sa.table('file',
# We only need to describe the columns that are relevent to us
sa.column('id', db.Integer),
sa.column('expiration', db.BigInteger)
)
def upgrade(): def upgrade():
op.add_column('file', sa.Column('expiration', sa.BigInteger())) op.add_column('file', sa.Column('expiration', sa.BigInteger()))
storage = Path(current_app.config["FHOST_STORAGE_PATH"])
current_time = time.time() * 1000;
# List of file hashes which have not expired yet
# This could get really big for some servers
unexpired_files = set(os.listdir(storage))
# Calculate an expiration date for all existing files
files = File.query\
.where(
sa.not_(File.removed)
).all()
for file in files:
if file.sha256 in unexpired_files:
file_path = storage / file.sha256
stat = os.stat(file_path)
max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms
file_birth = stat.st_mtime * 1000 # When the file was created, in ms
op.execute(
sa.update(UpdatedFile)
.where(UpdatedFile.c.id == file.id)
.values({'expiration': int(file_birth + max_age)})
)
def downgrade(): def downgrade():
op.drop_column('file', 'expiration') op.drop_column('file', 'expiration')