From 6d036eeb7d5026989a81fd857269f14da18e9adb Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Tue, 22 Nov 2022 16:15:50 -0500 Subject: [PATCH 01/13] Add support for expiring files SUPPLEMENTALLY: - Add an `expiration` field to the `file` table of the database - Produce a migration for the above change - Overhaul the cleanup script, and integrate into fhost.py (now run using FLASK_APP=fhost flask prune) - Replace the old cleanup script with a deprecation notice - Add information about how to expire files to the index - Update the README with information about the new script --- README.rst | 4 +- cleanup.py | 48 +------ fhost.py | 187 +++++++++++++++++++++++++-- migrations/versions/939a08e1d6e5_.py | 22 ++++ templates/index.html | 6 + 5 files changed, 215 insertions(+), 52 deletions(-) create mode 100644 migrations/versions/939a08e1d6e5_.py diff --git a/README.rst b/README.rst index f5270e4..b123e32 100644 --- a/README.rst +++ b/README.rst @@ -27,8 +27,8 @@ For all other servers, set ``FHOST_USE_X_ACCEL_REDIRECT`` to ``False`` and Otherwise, Flask will serve the file with chunked encoding, which sucks and should be avoided at all costs. -To make files expire, simply create a cronjob that runs ``cleanup.py`` every -now and then. +To make files expire, simply create a cronjob that runs ``FLASK_APP=fhost +flask prune`` every now and then. Before running the service for the first time, run ``FLASK_APP=fhost flask db upgrade``. diff --git a/cleanup.py b/cleanup.py index 0f9a5ce..cc998df 100755 --- a/cleanup.py +++ b/cleanup.py @@ -1,44 +1,8 @@ #!/usr/bin/env python3 -""" - Copyright © 2020 Mia Herkt - Licensed under the EUPL, Version 1.2 or - as soon as approved - by the European Commission - subsequent versions of the EUPL - (the "License"); - You may not use this work except in compliance with the License. - You may obtain a copy of the license at: - - https://joinup.ec.europa.eu/software/page/eupl - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - either express or implied. - See the License for the specific language governing permissions - and limitations under the License. -""" - -import os -import sys -import time -import datetime -from fhost import app - -os.chdir(os.path.dirname(sys.argv[0])) -os.chdir(app.config["FHOST_STORAGE_PATH"]) - -files = [f for f in os.listdir(".")] - -maxs = app.config["MAX_CONTENT_LENGTH"] -mind = 30 -maxd = 365 - -for f in files: - stat = os.stat(f) - systime = time.time() - age = datetime.timedelta(seconds=(systime - stat.st_mtime)).days - - maxage = mind + (-maxd + mind) * (stat.st_size / maxs - 1) ** 3 - - if age >= maxage: - os.remove(f) +print("This script has been replaced!!") +print("Instead, please run") +print("") +print(" $ FLASK_APP=fhost flask prune --legacy") +print("") +exit(1); diff --git a/fhost.py b/fhost.py index 9c2b94b..a908993 100755 --- a/fhost.py +++ b/fhost.py @@ -22,12 +22,17 @@ from flask import Flask, abort, make_response, redirect, request, send_from_directory, url_for, Response, render_template from flask_sqlalchemy import SQLAlchemy from flask_migrate import Migrate +from sqlalchemy import and_ from jinja2.exceptions import * from jinja2 import ChoiceLoader, FileSystemLoader from hashlib import sha256 from magic import Magic from mimetypes import guess_extension +import click +import os import sys +import time +import typing import requests from validators import url as url_valid from pathlib import Path @@ -121,12 +126,14 @@ class File(db.Model): addr = db.Column(db.UnicodeText) removed = db.Column(db.Boolean, default=False) nsfw_score = db.Column(db.Float) + expiration = db.Column(db.BigInteger) - def __init__(self, sha256, ext, mime, addr): + def __init__(self, sha256, ext, mime, addr, expiration): self.sha256 = sha256 self.ext = ext self.mime = mime self.addr = addr + self.expiration = expiration def getname(self): return u"{0}{1}".format(su.enbase(self.id), self.ext) @@ -139,7 +146,16 @@ class File(db.Model): else: return url_for("get", path=n, _external=True) + "\n" - def store(file_, addr): + """ + requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + + Any value greater that the longest allowed file lifespan will be rounded down to that + value. + """ + def store(file_, requested_expiration: typing.Optional[int], addr): data = file_.read() digest = sha256(data).hexdigest() @@ -175,15 +191,51 @@ class File(db.Model): return ext[:app.config["FHOST_MAX_EXT_LENGTH"]] or ".bin" - f = File.query.filter_by(sha256=digest).first() + # Returns the epoch millisecond that this file should expire + # + # Uses the expiration time provided by the user (requested_expiration) + # upper-bounded by an algorithm that computes the size based on the size of the + # file. + # + # That is, all files are assigned a computed expiration, which can voluntarily + # shortened by the user either by providing a timestamp in epoch millis or a + # duration in hours. + def get_expiration() -> int: + current_epoch_millis = time.time() * 1000; + # Maximum lifetime of the file in milliseconds + this_files_max_lifespan = get_max_lifespan(len(data)); + + # The latest allowed expiration date for this file, in epoch millis + this_files_max_expiration = this_files_max_lifespan + 1000 * time.time(); + + if requested_expiration is None: + return this_files_max_expiration + elif requested_expiration < 1650460320000: + # Treat the requested expiration time as a duration in hours + requested_expiration_ms = requested_expiration * 60 * 60 * 1000 + return min(this_files_max_expiration, current_epoch_millis + requested_expiration_ms) + else: + # Treat the requested expiration time as a timestamp in epoch millis + return min(this_files_max_expiration, requested_expiration); + + f = File.query.filter_by(sha256=digest).first() if f: + # If the file already exists if f.removed: + # The file was removed by moderation, so don't accept it back abort(451) + if f.expiration is None: + # The file has expired, so give it a new expiration date + f.expiration = get_expiration() + else: + # The file already exists, update the expiration if needed + f.expiration = max(f.expiration, get_expiration()) else: mime = get_mime() ext = get_ext(mime) - f = File(digest, ext, mime, addr) + expiration = get_expiration() + f = File(digest, ext, mime, addr, expiration) f.addr = addr @@ -260,11 +312,20 @@ def in_upload_bl(addr): return False -def store_file(f, addr): +""" +requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + +Any value greater that the longest allowed file lifespan will be rounded down to that +value. +""" +def store_file(f, requested_expiration: typing.Optional[int], addr): if in_upload_bl(addr): return "Your host is blocked from uploading files.\n", 451 - sf = File.store(f, addr) + sf = File.store(f, requested_expiration, addr) return sf.geturl() @@ -289,7 +350,7 @@ def store_url(url, addr): f = urlfile(read=r.raw.read, content_type=r.headers["content-type"], filename="") - return store_file(f, addr) + return store_file(f, None, addr) else: abort(413) else: @@ -336,7 +397,23 @@ def fhost(): sf = None if "file" in request.files: - return store_file(request.files["file"], request.remote_addr) + try: + # Store the file with the requested expiration date + return store_file( + request.files["file"], + int(request.form["expires"]), + request.remote_addr + ) + except ValueError: + # The requested expiration date wasn't properly formed + abort(400) + except KeyError: + # No expiration date was requested, store with the max lifespan + return store_file( + request.files["file"], + None, + request.remote_addr + ) elif "url" in request.form: return store_url(request.form["url"], request.remote_addr) elif "shorten" in request.form: @@ -364,3 +441,97 @@ def ehandler(e): return render_template(f"{e.code}.html", id=id), e.code except TemplateNotFound: return "Segmentation fault\n", e.code + +@app.cli.command("prune") +@click.option( + '-l', '--legacy', + is_flag=True, + default=False, + help="Also look for legacy files" +) +def prune(legacy: bool): + """ + Clean up expired files + + Deletes any files from the filesystem which have hit their expiration time. This + doesn't remove them from the database, only from the filesystem. It's recommended + that server owners run this command regularly, or set it up on a timer. + + Server owners who recently applied a migration that creates legacy files should run + this script with the --legacy/-l flag enabled for at least FHOST_MAX_EXPIRATION. + """ + current_time = time.time() * 1000; + + # The path to where uploaded files are stored + storage = Path(app.config["FHOST_STORAGE_PATH"]) + + # A list of all files who've passed their expiration times + expired_files = File.query\ + .where( + and_( + File.expiration.is_not(None), + File.expiration < current_time + ) + ).all() + + files_removed = 0; + + # For every expired file... + for file in expired_files: + # Log the file we're about to remove + file_name = file.getname() + file_hash = file.sha256 + file_path = storage / file_hash + print(f"Removing expired file {file_name} [{file_hash}]") + + # Remove it from the file system + try: + os.remove(file_path) + files_removed += 1; + except FileNotFoundError: + pass # If the file was already gone, we're good + except OSError as e: + print(e) + print( + "\n------------------------------------" + "Encountered an error while trying to remove file {file_path}. Double" + "check to make sure the server is configured correctly, permissions are" + "okay, and everything is ship shape, then try again.") + return; + + # Finally, mark that the file was removed + file.expiration = None; + db.session.commit() + + # Prior to 0x0 tracking file expiration times, files were removed by scanning the + # filesystem. If this system was recently migrated from the old system, there might + # still be files whose expirations aren't tracked and must be noticed the old way. + # Therefore, we perform an additional check on files in the upload directory. + if legacy: + for file in os.listdir(storage): + file_path = storage / file + stat = os.stat(file_path) + file_age = (current_time - stat.st_mtime * 1000) # How long the file has existed, in ms + max_age = get_max_lifespan(stat.st_size) + if file_age > max_age: + print(f"Removing legacy file {file_path}") + os.remove(file_path) + files_removed += 1; + + print(f"\nDone! {files_removed} file(s) removed") + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py new file mode 100644 index 0000000..8a47428 --- /dev/null +++ b/migrations/versions/939a08e1d6e5_.py @@ -0,0 +1,22 @@ +"""add file expirations [creates legacy files] + +Revision ID: 939a08e1d6e5 +Revises: 7e246705da6a +Create Date: 2022-11-22 12:16:32.517184 + +""" + +# revision identifiers, used by Alembic. +revision = '939a08e1d6e5' +down_revision = '7e246705da6a' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('file', sa.Column('expiration', sa.BigInteger())) + + +def downgrade(): + op.drop_column('file', 'expiration') diff --git a/templates/index.html b/templates/index.html index cef9de2..b115d52 100644 --- a/templates/index.html +++ b/templates/index.html @@ -11,6 +11,12 @@ Or you can shorten URLs: File URLs are valid for at least 30 days and up to a year (see below). Shortened URLs do not expire. + +Files can be set to expire sooner by adding an "expires" parameter (in hours) + curl -F'file=@yourfile.png' -F'expires=24' {{ fhost_url }} +OR by setting "expires" to a timestamp in epoch milliseconds + curl -F'file=@yourfile.png' -F'expires=1681996320000' {{ fhost_url }} + {% set max_size = config["MAX_CONTENT_LENGTH"]|filesizeformat(True) %} Maximum file size: {{ max_size }} Not allowed: {{ config["FHOST_MIME_BLACKLIST"]|join(", ") }} -- 2.48.1 From 78b8a73ea5883f66ce0433e3b7461054998f7f6a Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Tue, 22 Nov 2022 16:51:54 -0500 Subject: [PATCH 02/13] Add a note explaining that expired files aren't immediately removed --- templates/index.html | 3 +++ 1 file changed, 3 insertions(+) diff --git a/templates/index.html b/templates/index.html index b115d52..8b8c7c5 100644 --- a/templates/index.html +++ b/templates/index.html @@ -17,6 +17,9 @@ Files can be set to expire sooner by adding an "expires" parameter (in hours) OR by setting "expires" to a timestamp in epoch milliseconds curl -F'file=@yourfile.png' -F'expires=1681996320000' {{ fhost_url }} +Expired files won't be removed immediately, but will be removed as part of +the next purge. + {% set max_size = config["MAX_CONTENT_LENGTH"]|filesizeformat(True) %} Maximum file size: {{ max_size }} Not allowed: {{ config["FHOST_MIME_BLACKLIST"]|join(", ") }} -- 2.48.1 From bed8c1d047faa84faa261aa58c11e8b431b55eed Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Tue, 22 Nov 2022 17:16:38 -0500 Subject: [PATCH 03/13] Show correct times on the index page graph --- templates/index.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/index.html b/templates/index.html index 8b8c7c5..6f84f59 100644 --- a/templates/index.html +++ b/templates/index.html @@ -31,7 +31,7 @@ FILE RETENTION PERIOD retention = min_age + (-max_age + min_age) * pow((file_size / max_size - 1), 3) days - 365 | \\ + {{'{: 6}'.format(config.get("FHOST_MAX_EXPIRATION", 31536000000)//86400000)}} | \\ | \\ | \\ | \\ @@ -39,7 +39,7 @@ retention = min_age + (-max_age + min_age) * pow((file_size / max_size - 1), 3) | \\ | .. | \\ - 197.5 | ----------..------------------------------------------- + {{'{: 6.1f}'.format((config.get("FHOST_MIN_EXPIRATION", 2592000000)/2 + config.get("FHOST_MAX_EXPIRATION", 31536000000)/2)/86400000)}} | ----------..------------------------------------------- | .. | \\ | .. @@ -48,7 +48,7 @@ retention = min_age + (-max_age + min_age) * pow((file_size / max_size - 1), 3) | ... | .... | ...... - 30 | .................... + {{'{: 6}'.format(config.get("FHOST_MIN_EXPIRATION", 2592000000)//86400000)}} | .................... 0{{ ((config["MAX_CONTENT_LENGTH"]/2)|filesizeformat(True)).split(" ")[0].rjust(27) }}{{ max_size.split(" ")[0].rjust(27) }} {{ max_size.split(" ")[1].rjust(54) }} -- 2.48.1 From f507de825626d5e4f40e3bbbfb17bba83314de17 Mon Sep 17 00:00:00 2001 From: Ember Hearth Date: Sat, 26 Nov 2022 21:24:00 -0500 Subject: [PATCH 04/13] Improve the migration script, removing the need for --legacy --- cleanup.py | 2 +- fhost.py | 26 +----------- migrations/versions/939a08e1d6e5_.py | 61 ++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/cleanup.py b/cleanup.py index cc998df..14fbc61 100755 --- a/cleanup.py +++ b/cleanup.py @@ -3,6 +3,6 @@ print("This script has been replaced!!") print("Instead, please run") print("") -print(" $ FLASK_APP=fhost flask prune --legacy") +print(" $ FLASK_APP=fhost flask prune") print("") exit(1); diff --git a/fhost.py b/fhost.py index a908993..469669a 100755 --- a/fhost.py +++ b/fhost.py @@ -443,22 +443,13 @@ def ehandler(e): return "Segmentation fault\n", e.code @app.cli.command("prune") -@click.option( - '-l', '--legacy', - is_flag=True, - default=False, - help="Also look for legacy files" -) -def prune(legacy: bool): +def prune(): """ Clean up expired files Deletes any files from the filesystem which have hit their expiration time. This doesn't remove them from the database, only from the filesystem. It's recommended that server owners run this command regularly, or set it up on a timer. - - Server owners who recently applied a migration that creates legacy files should run - this script with the --legacy/-l flag enabled for at least FHOST_MAX_EXPIRATION. """ current_time = time.time() * 1000; @@ -503,21 +494,6 @@ def prune(legacy: bool): file.expiration = None; db.session.commit() - # Prior to 0x0 tracking file expiration times, files were removed by scanning the - # filesystem. If this system was recently migrated from the old system, there might - # still be files whose expirations aren't tracked and must be noticed the old way. - # Therefore, we perform an additional check on files in the upload directory. - if legacy: - for file in os.listdir(storage): - file_path = storage / file - stat = os.stat(file_path) - file_age = (current_time - stat.st_mtime * 1000) # How long the file has existed, in ms - max_age = get_max_lifespan(stat.st_size) - if file_age > max_age: - print(f"Removing legacy file {file_path}") - os.remove(file_path) - files_removed += 1; - print(f"\nDone! {files_removed} file(s) removed") """ For a file of a given size, determine the largest allowed lifespan of that file diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 8a47428..3b9b418 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -11,12 +11,73 @@ revision = '939a08e1d6e5' down_revision = '7e246705da6a' from alembic import op +from flask import current_app +from flask_sqlalchemy import SQLAlchemy +from pathlib import Path import sqlalchemy as sa +import os +import time + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = current_app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = current_app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) + +db = SQLAlchemy(current_app.__weakref__()) + +# Representations of the original and updated File tables +class File(db.Model): + id = db.Column(db.Integer, primary_key = True) + sha256 = db.Column(db.String, unique = True) + ext = db.Column(db.UnicodeText) + mime = db.Column(db.UnicodeText) + addr = db.Column(db.UnicodeText) + removed = db.Column(db.Boolean, default=False) + nsfw_score = db.Column(db.Float) +UpdatedFile = sa.table('file', + # We only need to describe the columns that are relevent to us + sa.column('id', db.Integer), + sa.column('expiration', db.BigInteger) +) def upgrade(): op.add_column('file', sa.Column('expiration', sa.BigInteger())) + storage = Path(current_app.config["FHOST_STORAGE_PATH"]) + current_time = time.time() * 1000; + + # List of file hashes which have not expired yet + # This could get really big for some servers + unexpired_files = set(os.listdir(storage)) + + # Calculate an expiration date for all existing files + files = File.query\ + .where( + sa.not_(File.removed) + ).all() + for file in files: + if file.sha256 in unexpired_files: + file_path = storage / file.sha256 + stat = os.stat(file_path) + max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms + file_birth = stat.st_mtime * 1000 # When the file was created, in ms + op.execute( + sa.update(UpdatedFile) + .where(UpdatedFile.c.id == file.id) + .values({'expiration': int(file_birth + max_age)}) + ) def downgrade(): op.drop_column('file', 'expiration') -- 2.48.1 From d14713d077beceb9264c9be7e933e7ad40a94a5e Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:14:31 -0500 Subject: [PATCH 05/13] Use automap in place of an explicit file map in migration --- migrations/versions/939a08e1d6e5_.py | 29 +++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 3b9b418..850a7d7 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -15,6 +15,8 @@ from flask import current_app from flask_sqlalchemy import SQLAlchemy from pathlib import Path import sqlalchemy as sa +from sqlalchemy.ext.automap import automap_base +from sqlalchemy.orm import Session import os import time @@ -37,24 +39,23 @@ def get_max_lifespan(filesize: int) -> int: db = SQLAlchemy(current_app.__weakref__()) -# Representations of the original and updated File tables -class File(db.Model): - id = db.Column(db.Integer, primary_key = True) - sha256 = db.Column(db.String, unique = True) - ext = db.Column(db.UnicodeText) - mime = db.Column(db.UnicodeText) - addr = db.Column(db.UnicodeText) - removed = db.Column(db.Boolean, default=False) - nsfw_score = db.Column(db.Float) +# Representation of the updated (future) File table UpdatedFile = sa.table('file', # We only need to describe the columns that are relevent to us sa.column('id', db.Integer), sa.column('expiration', db.BigInteger) ) +Base = automap_base() + def upgrade(): op.add_column('file', sa.Column('expiration', sa.BigInteger())) + bind = op.get_bind() + Base.prepare(autoload_with=bind) + File = Base.classes.file + session = Session(bind=bind) + storage = Path(current_app.config["FHOST_STORAGE_PATH"]) current_time = time.time() * 1000; @@ -63,10 +64,12 @@ def upgrade(): unexpired_files = set(os.listdir(storage)) # Calculate an expiration date for all existing files - files = File.query\ - .where( - sa.not_(File.removed) - ).all() + files = session.scalars( + sa.select(File) + .where( + sa.not_(File.removed) + ) + ) for file in files: if file.sha256 in unexpired_files: file_path = storage / file.sha256 -- 2.48.1 From 39d24e56c30e9924997e80cfdd707dcf4b61b30a Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:16:39 -0500 Subject: [PATCH 06/13] Remove vestigial `touch()` --- fhost.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fhost.py b/fhost.py index 469669a..07b2437 100755 --- a/fhost.py +++ b/fhost.py @@ -246,8 +246,6 @@ class File(db.Model): if not p.is_file(): with open(p, "wb") as of: of.write(data) - else: - p.touch() if not f.nsfw_score and app.config["NSFW_DETECT"]: f.nsfw_score = nsfw.detect(p) -- 2.48.1 From 64ddfdb7023d32980a7ff26dbf52b683c1177d1f Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:20:43 -0500 Subject: [PATCH 07/13] Don't crash when upgrading a fresh database --- migrations/versions/939a08e1d6e5_.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 850a7d7..4cdfa34 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -61,7 +61,10 @@ def upgrade(): # List of file hashes which have not expired yet # This could get really big for some servers - unexpired_files = set(os.listdir(storage)) + try: + unexpired_files = set(os.listdir(storage)) + except FileNotFoundError: + return # There are no currently unexpired files # Calculate an expiration date for all existing files files = session.scalars( -- 2.48.1 From 74b6f986ee4d14742c17af02a2cd543f1a8981d3 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:21:03 -0500 Subject: [PATCH 08/13] Remove vestigial warning about legacy files --- migrations/versions/939a08e1d6e5_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 4cdfa34..ae70c15 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -1,4 +1,4 @@ -"""add file expirations [creates legacy files] +"""add file expirations Revision ID: 939a08e1d6e5 Revises: 7e246705da6a -- 2.48.1 From 19d989b69681cec34ce0cac700d63c89475f91e5 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:24:26 -0500 Subject: [PATCH 09/13] More efficiently filter to unexpired files when migrating https://git.0x0.st/mia/0x0/pulls/72#issuecomment-224 --- migrations/versions/939a08e1d6e5_.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index ae70c15..4245a8b 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -62,7 +62,7 @@ def upgrade(): # List of file hashes which have not expired yet # This could get really big for some servers try: - unexpired_files = set(os.listdir(storage)) + unexpired_files = os.listdir(storage) except FileNotFoundError: return # There are no currently unexpired files @@ -70,20 +70,20 @@ def upgrade(): files = session.scalars( sa.select(File) .where( - sa.not_(File.removed) + sa.not_(File.removed), + File.sha256.in_(unexpired_files) ) ) for file in files: - if file.sha256 in unexpired_files: - file_path = storage / file.sha256 - stat = os.stat(file_path) - max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms - file_birth = stat.st_mtime * 1000 # When the file was created, in ms - op.execute( - sa.update(UpdatedFile) - .where(UpdatedFile.c.id == file.id) - .values({'expiration': int(file_birth + max_age)}) - ) + file_path = storage / file.sha256 + stat = os.stat(file_path) + max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms + file_birth = stat.st_mtime * 1000 # When the file was created, in ms + op.execute( + sa.update(UpdatedFile) + .where(UpdatedFile.c.id == file.id) + .values({'expiration': int(file_birth + max_age)}) + ) def downgrade(): op.drop_column('file', 'expiration') -- 2.48.1 From 55ee3740b0d39144c4cad7896c8b386d89d40c15 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:28:22 -0500 Subject: [PATCH 10/13] Coalesce updates to the database during migration https://git.0x0.st/mia/0x0/pulls/72#issuecomment-226 --- migrations/versions/939a08e1d6e5_.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 4245a8b..004d077 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -74,16 +74,17 @@ def upgrade(): File.sha256.in_(unexpired_files) ) ) + updates = [] # We coalesce updates to the database here for file in files: file_path = storage / file.sha256 stat = os.stat(file_path) max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms file_birth = stat.st_mtime * 1000 # When the file was created, in ms - op.execute( - sa.update(UpdatedFile) - .where(UpdatedFile.c.id == file.id) - .values({'expiration': int(file_birth + max_age)}) - ) + updates.append({'id': file.id, 'expiration': int(file_birth + max_age)}) + + # Apply coalesced updates + session.bulk_update_mappings(File, updates) + session.commit() def downgrade(): op.drop_column('file', 'expiration') -- 2.48.1 From 60db7938c8c10fa93b4a942dac623e2da1ae1440 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:43:34 -0500 Subject: [PATCH 11/13] Remove vestigial database model https://git.0x0.st/mia/0x0/pulls/72#issuecomment-261 --- migrations/versions/939a08e1d6e5_.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py index 004d077..f86dcb3 100644 --- a/migrations/versions/939a08e1d6e5_.py +++ b/migrations/versions/939a08e1d6e5_.py @@ -37,15 +37,6 @@ def get_max_lifespan(filesize: int) -> int: max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) -db = SQLAlchemy(current_app.__weakref__()) - -# Representation of the updated (future) File table -UpdatedFile = sa.table('file', - # We only need to describe the columns that are relevent to us - sa.column('id', db.Integer), - sa.column('expiration', db.BigInteger) -) - Base = automap_base() def upgrade(): -- 2.48.1 From 11cfd07d711ff8ae2d301fda19dfa08a0712d151 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 16:49:09 -0500 Subject: [PATCH 12/13] prune: Stream expired files from the database (as opposed to collecting them all first) --- fhost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fhost.py b/fhost.py index 07b2437..bda680f 100755 --- a/fhost.py +++ b/fhost.py @@ -461,7 +461,7 @@ def prune(): File.expiration.is_not(None), File.expiration < current_time ) - ).all() + ) files_removed = 0; -- 2.48.1 From 7bb18b17dcb5280439a085a418eb688138f0b89f Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 28 Nov 2022 17:09:18 -0500 Subject: [PATCH 13/13] config.example.py: Add min & max expiration + description --- instance/config.example.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/instance/config.example.py b/instance/config.example.py index 64c977c..019ec11 100644 --- a/instance/config.example.py +++ b/instance/config.example.py @@ -45,6 +45,19 @@ MAX_CONTENT_LENGTH = 256 * 1024 * 1024 # Default: 256MiB MAX_URL_LENGTH = 4096 +# The minimum and maximum amount of time we'll retain a file for +# +# Small files (nearing zero bytes) are stored for the longest possible expiration date, +# while larger files (nearing MAX_CONTENT_LENGTH bytes) are stored for the shortest amount +# of time. Values between these two extremes are interpolated with an exponential curve, +# like the one shown on the index page. +# +# All times are in milliseconds. If you want all files to be stored for the same amount +# of time, set these to the same value. +FHOST_MIN_EXPIRATION = 30 * 24 * 60 * 60 * 1000 +FHOST_MAX_EXPIRATION = 365 * 24 * 60 * 60 * 1000 + + # Use the X-SENDFILE header to speed up serving files w/ compatible webservers # # Some webservers can be configured use the X-Sendfile header to handle sending -- 2.48.1