diff --git a/README.rst b/README.rst index f5270e4..b123e32 100644 --- a/README.rst +++ b/README.rst @@ -27,8 +27,8 @@ For all other servers, set ``FHOST_USE_X_ACCEL_REDIRECT`` to ``False`` and Otherwise, Flask will serve the file with chunked encoding, which sucks and should be avoided at all costs. -To make files expire, simply create a cronjob that runs ``cleanup.py`` every -now and then. +To make files expire, simply create a cronjob that runs ``FLASK_APP=fhost +flask prune`` every now and then. Before running the service for the first time, run ``FLASK_APP=fhost flask db upgrade``. diff --git a/cleanup.py b/cleanup.py index 0f9a5ce..cc998df 100755 --- a/cleanup.py +++ b/cleanup.py @@ -1,44 +1,8 @@ #!/usr/bin/env python3 -""" - Copyright © 2020 Mia Herkt - Licensed under the EUPL, Version 1.2 or - as soon as approved - by the European Commission - subsequent versions of the EUPL - (the "License"); - You may not use this work except in compliance with the License. - You may obtain a copy of the license at: - - https://joinup.ec.europa.eu/software/page/eupl - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - either express or implied. - See the License for the specific language governing permissions - and limitations under the License. -""" - -import os -import sys -import time -import datetime -from fhost import app - -os.chdir(os.path.dirname(sys.argv[0])) -os.chdir(app.config["FHOST_STORAGE_PATH"]) - -files = [f for f in os.listdir(".")] - -maxs = app.config["MAX_CONTENT_LENGTH"] -mind = 30 -maxd = 365 - -for f in files: - stat = os.stat(f) - systime = time.time() - age = datetime.timedelta(seconds=(systime - stat.st_mtime)).days - - maxage = mind + (-maxd + mind) * (stat.st_size / maxs - 1) ** 3 - - if age >= maxage: - os.remove(f) +print("This script has been replaced!!") +print("Instead, please run") +print("") +print(" $ FLASK_APP=fhost flask prune --legacy") +print("") +exit(1); diff --git a/fhost.py b/fhost.py index 9c2b94b..a908993 100755 --- a/fhost.py +++ b/fhost.py @@ -22,12 +22,17 @@ from flask import Flask, abort, make_response, redirect, request, send_from_directory, url_for, Response, render_template from flask_sqlalchemy import SQLAlchemy from flask_migrate import Migrate +from sqlalchemy import and_ from jinja2.exceptions import * from jinja2 import ChoiceLoader, FileSystemLoader from hashlib import sha256 from magic import Magic from mimetypes import guess_extension +import click +import os import sys +import time +import typing import requests from validators import url as url_valid from pathlib import Path @@ -121,12 +126,14 @@ class File(db.Model): addr = db.Column(db.UnicodeText) removed = db.Column(db.Boolean, default=False) nsfw_score = db.Column(db.Float) + expiration = db.Column(db.BigInteger) - def __init__(self, sha256, ext, mime, addr): + def __init__(self, sha256, ext, mime, addr, expiration): self.sha256 = sha256 self.ext = ext self.mime = mime self.addr = addr + self.expiration = expiration def getname(self): return u"{0}{1}".format(su.enbase(self.id), self.ext) @@ -139,7 +146,16 @@ class File(db.Model): else: return url_for("get", path=n, _external=True) + "\n" - def store(file_, addr): + """ + requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + + Any value greater that the longest allowed file lifespan will be rounded down to that + value. + """ + def store(file_, requested_expiration: typing.Optional[int], addr): data = file_.read() digest = sha256(data).hexdigest() @@ -175,15 +191,51 @@ class File(db.Model): return ext[:app.config["FHOST_MAX_EXT_LENGTH"]] or ".bin" - f = File.query.filter_by(sha256=digest).first() + # Returns the epoch millisecond that this file should expire + # + # Uses the expiration time provided by the user (requested_expiration) + # upper-bounded by an algorithm that computes the size based on the size of the + # file. + # + # That is, all files are assigned a computed expiration, which can voluntarily + # shortened by the user either by providing a timestamp in epoch millis or a + # duration in hours. + def get_expiration() -> int: + current_epoch_millis = time.time() * 1000; + # Maximum lifetime of the file in milliseconds + this_files_max_lifespan = get_max_lifespan(len(data)); + + # The latest allowed expiration date for this file, in epoch millis + this_files_max_expiration = this_files_max_lifespan + 1000 * time.time(); + + if requested_expiration is None: + return this_files_max_expiration + elif requested_expiration < 1650460320000: + # Treat the requested expiration time as a duration in hours + requested_expiration_ms = requested_expiration * 60 * 60 * 1000 + return min(this_files_max_expiration, current_epoch_millis + requested_expiration_ms) + else: + # Treat the requested expiration time as a timestamp in epoch millis + return min(this_files_max_expiration, requested_expiration); + + f = File.query.filter_by(sha256=digest).first() if f: + # If the file already exists if f.removed: + # The file was removed by moderation, so don't accept it back abort(451) + if f.expiration is None: + # The file has expired, so give it a new expiration date + f.expiration = get_expiration() + else: + # The file already exists, update the expiration if needed + f.expiration = max(f.expiration, get_expiration()) else: mime = get_mime() ext = get_ext(mime) - f = File(digest, ext, mime, addr) + expiration = get_expiration() + f = File(digest, ext, mime, addr, expiration) f.addr = addr @@ -260,11 +312,20 @@ def in_upload_bl(addr): return False -def store_file(f, addr): +""" +requested_expiration can be: + - None, to use the longest allowed file lifespan + - a duration (in hours) that the file should live for + - a timestamp in epoch millis that the file should expire at + +Any value greater that the longest allowed file lifespan will be rounded down to that +value. +""" +def store_file(f, requested_expiration: typing.Optional[int], addr): if in_upload_bl(addr): return "Your host is blocked from uploading files.\n", 451 - sf = File.store(f, addr) + sf = File.store(f, requested_expiration, addr) return sf.geturl() @@ -289,7 +350,7 @@ def store_url(url, addr): f = urlfile(read=r.raw.read, content_type=r.headers["content-type"], filename="") - return store_file(f, addr) + return store_file(f, None, addr) else: abort(413) else: @@ -336,7 +397,23 @@ def fhost(): sf = None if "file" in request.files: - return store_file(request.files["file"], request.remote_addr) + try: + # Store the file with the requested expiration date + return store_file( + request.files["file"], + int(request.form["expires"]), + request.remote_addr + ) + except ValueError: + # The requested expiration date wasn't properly formed + abort(400) + except KeyError: + # No expiration date was requested, store with the max lifespan + return store_file( + request.files["file"], + None, + request.remote_addr + ) elif "url" in request.form: return store_url(request.form["url"], request.remote_addr) elif "shorten" in request.form: @@ -364,3 +441,97 @@ def ehandler(e): return render_template(f"{e.code}.html", id=id), e.code except TemplateNotFound: return "Segmentation fault\n", e.code + +@app.cli.command("prune") +@click.option( + '-l', '--legacy', + is_flag=True, + default=False, + help="Also look for legacy files" +) +def prune(legacy: bool): + """ + Clean up expired files + + Deletes any files from the filesystem which have hit their expiration time. This + doesn't remove them from the database, only from the filesystem. It's recommended + that server owners run this command regularly, or set it up on a timer. + + Server owners who recently applied a migration that creates legacy files should run + this script with the --legacy/-l flag enabled for at least FHOST_MAX_EXPIRATION. + """ + current_time = time.time() * 1000; + + # The path to where uploaded files are stored + storage = Path(app.config["FHOST_STORAGE_PATH"]) + + # A list of all files who've passed their expiration times + expired_files = File.query\ + .where( + and_( + File.expiration.is_not(None), + File.expiration < current_time + ) + ).all() + + files_removed = 0; + + # For every expired file... + for file in expired_files: + # Log the file we're about to remove + file_name = file.getname() + file_hash = file.sha256 + file_path = storage / file_hash + print(f"Removing expired file {file_name} [{file_hash}]") + + # Remove it from the file system + try: + os.remove(file_path) + files_removed += 1; + except FileNotFoundError: + pass # If the file was already gone, we're good + except OSError as e: + print(e) + print( + "\n------------------------------------" + "Encountered an error while trying to remove file {file_path}. Double" + "check to make sure the server is configured correctly, permissions are" + "okay, and everything is ship shape, then try again.") + return; + + # Finally, mark that the file was removed + file.expiration = None; + db.session.commit() + + # Prior to 0x0 tracking file expiration times, files were removed by scanning the + # filesystem. If this system was recently migrated from the old system, there might + # still be files whose expirations aren't tracked and must be noticed the old way. + # Therefore, we perform an additional check on files in the upload directory. + if legacy: + for file in os.listdir(storage): + file_path = storage / file + stat = os.stat(file_path) + file_age = (current_time - stat.st_mtime * 1000) # How long the file has existed, in ms + max_age = get_max_lifespan(stat.st_size) + if file_age > max_age: + print(f"Removing legacy file {file_path}") + os.remove(file_path) + files_removed += 1; + + print(f"\nDone! {files_removed} file(s) removed") + +""" For a file of a given size, determine the largest allowed lifespan of that file + +Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well +as FHOST_{MIN,MAX}_EXPIRATION. + +This lifespan may be shortened by a user's request, but no files should be allowed to +expire at a point after this number. + +Value returned is a duration in milliseconds. +""" +def get_max_lifespan(filesize: int) -> int: + min_exp = app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000) + max_exp = app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000) + max_size = app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024) + return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3) diff --git a/migrations/versions/939a08e1d6e5_.py b/migrations/versions/939a08e1d6e5_.py new file mode 100644 index 0000000..8a47428 --- /dev/null +++ b/migrations/versions/939a08e1d6e5_.py @@ -0,0 +1,22 @@ +"""add file expirations [creates legacy files] + +Revision ID: 939a08e1d6e5 +Revises: 7e246705da6a +Create Date: 2022-11-22 12:16:32.517184 + +""" + +# revision identifiers, used by Alembic. +revision = '939a08e1d6e5' +down_revision = '7e246705da6a' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('file', sa.Column('expiration', sa.BigInteger())) + + +def downgrade(): + op.drop_column('file', 'expiration') diff --git a/templates/index.html b/templates/index.html index cef9de2..b115d52 100644 --- a/templates/index.html +++ b/templates/index.html @@ -11,6 +11,12 @@ Or you can shorten URLs: File URLs are valid for at least 30 days and up to a year (see below). Shortened URLs do not expire. + +Files can be set to expire sooner by adding an "expires" parameter (in hours) + curl -F'file=@yourfile.png' -F'expires=24' {{ fhost_url }} +OR by setting "expires" to a timestamp in epoch milliseconds + curl -F'file=@yourfile.png' -F'expires=1681996320000' {{ fhost_url }} + {% set max_size = config["MAX_CONTENT_LENGTH"]|filesizeformat(True) %} Maximum file size: {{ max_size }} Not allowed: {{ config["FHOST_MIME_BLACKLIST"]|join(", ") }}