Compare commits

...

6 commits

Author SHA1 Message Date
a2b322f868
Avoid holding in-memory copies of file content
Werkzeug uses tempfile.SpooledTemporaryFile, so we can make use of
file-like object properties. This may result in more disk writes,
but that’s probably better than eating up RAM.

I hope this fixes #84.
2024-09-27 20:45:42 +02:00
f65bccc2aa
Remove blacklist reference from default index template 2024-09-27 19:15:57 +02:00
de19212a71
PEP8 compliance 2024-09-27 18:30:33 +02:00
a2147cc964
Remove broken tests
Will be readded after some major refactoring and modernization.
2024-09-27 18:30:31 +02:00
45a414c5ee
Implement request filters
This moves preexisting blacklists to the database, and adds the
following filter types:

    * IP address
    * IP network
    * MIME type
    * User agent

In addition, IP address handling is now done with the ipaddress
module.
2024-09-27 18:30:28 +02:00
6393538333
Replace NSFW detector implementation 2024-09-27 06:34:14 +02:00
29 changed files with 717 additions and 3988 deletions

View file

@ -95,12 +95,15 @@ Optional:
NSFW Detection
--------------
0x0 supports classification of NSFW content via Yahoos open_nsfw Caffe
neural network model. This works for images and video files and requires
the following:
0x0 supports classification of NSFW content via
`a machine learning model <https://huggingface.co/giacomoarienti/nsfw-classifier>`_.
This works for images and video files and requires the following
Python modules:
* Caffe Python module (built for Python 3)
* `PyAV <https://github.com/PyAV-Org/PyAV>`_
* torch
* transformers
* pillow
* `av <https://github.com/PyAV-Org/PyAV>`_
Virus Scanning

View file

@ -5,4 +5,4 @@ print("Instead, please run")
print("")
print(" $ FLASK_APP=fhost flask prune")
print("")
exit(1);
exit(1)

472
fhost.py
View file

@ -1,8 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright © 2020 Mia Herkt
Copyright © 2024 Mia Herkt
Licensed under the EUPL, Version 1.2 or - as soon as approved
by the European Commission - subsequent versions of the EUPL
(the "License");
@ -19,77 +18,79 @@
and limitations under the License.
"""
from flask import Flask, abort, make_response, redirect, request, send_from_directory, url_for, Response, render_template
from flask import Flask, abort, make_response, redirect, render_template, \
Request, request, Response, send_from_directory, url_for
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
from sqlalchemy import and_, or_
from sqlalchemy.orm import declared_attr
import sqlalchemy.types as types
from jinja2.exceptions import *
from jinja2 import ChoiceLoader, FileSystemLoader
from hashlib import sha256
from hashlib import file_digest
from magic import Magic
from mimetypes import guess_extension
import click
import enum
import os
import sys
import time
import datetime
import ipaddress
import typing
import requests
import secrets
import re
from validators import url as url_valid
from pathlib import Path
app = Flask(__name__, instance_relative_config=True)
app.config.update(
SQLALCHEMY_TRACK_MODIFICATIONS = False,
PREFERRED_URL_SCHEME = "https", # nginx users: make sure to have 'uwsgi_param UWSGI_SCHEME $scheme;' in your config
MAX_CONTENT_LENGTH = 256 * 1024 * 1024,
MAX_URL_LENGTH = 4096,
USE_X_SENDFILE = False,
FHOST_USE_X_ACCEL_REDIRECT = True, # expect nginx by default
FHOST_STORAGE_PATH = "up",
FHOST_MAX_EXT_LENGTH = 9,
FHOST_SECRET_BYTES = 16,
FHOST_EXT_OVERRIDE = {
"audio/flac" : ".flac",
"image/gif" : ".gif",
"image/jpeg" : ".jpg",
"image/png" : ".png",
"image/svg+xml" : ".svg",
"video/webm" : ".webm",
"video/x-matroska" : ".mkv",
"application/octet-stream" : ".bin",
"text/plain" : ".log",
"text/plain" : ".txt",
"text/x-diff" : ".diff",
SQLALCHEMY_TRACK_MODIFICATIONS=False,
PREFERRED_URL_SCHEME="https", # nginx users: make sure to have
# 'uwsgi_param UWSGI_SCHEME $scheme;' in
# your config
MAX_CONTENT_LENGTH=256 * 1024 * 1024,
MAX_URL_LENGTH=4096,
USE_X_SENDFILE=False,
FHOST_USE_X_ACCEL_REDIRECT=True, # expect nginx by default
FHOST_STORAGE_PATH="up",
FHOST_MAX_EXT_LENGTH=9,
FHOST_SECRET_BYTES=16,
FHOST_EXT_OVERRIDE={
"audio/flac": ".flac",
"image/gif": ".gif",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/svg+xml": ".svg",
"video/webm": ".webm",
"video/x-matroska": ".mkv",
"application/octet-stream": ".bin",
"text/plain": ".log",
"text/plain": ".txt",
"text/x-diff": ".diff",
},
FHOST_MIME_BLACKLIST = [
"application/x-dosexec",
"application/java-archive",
"application/java-vm"
],
FHOST_UPLOAD_BLACKLIST = None,
NSFW_DETECT = False,
NSFW_THRESHOLD = 0.608,
VSCAN_SOCKET = None,
VSCAN_QUARANTINE_PATH = "quarantine",
VSCAN_IGNORE = [
NSFW_DETECT=False,
NSFW_THRESHOLD=0.92,
VSCAN_SOCKET=None,
VSCAN_QUARANTINE_PATH="quarantine",
VSCAN_IGNORE=[
"Eicar-Test-Signature",
"PUA.Win.Packer.XmMusicFile",
],
VSCAN_INTERVAL = datetime.timedelta(days=7),
URL_ALPHABET = "DEQhd2uFteibPwq0SWBInTpA_jcZL5GKz3YCR14Ulk87Jors9vNHgfaOmMXy6Vx-",
VSCAN_INTERVAL=datetime.timedelta(days=7),
URL_ALPHABET="DEQhd2uFteibPwq0SWBInTpA_jcZL5GKz3YCR14Ulk87Jors9vNHgfaOmMX"
"y6Vx-",
)
if not app.config["TESTING"]:
app.config.from_pyfile("config.py")
app.jinja_loader = ChoiceLoader([
FileSystemLoader(str(Path(app.instance_path) / "templates")),
app.jinja_loader
])
app.config.from_pyfile("config.py")
app.jinja_loader = ChoiceLoader([
FileSystemLoader(str(Path(app.instance_path) / "templates")),
app.jinja_loader
])
if app.config["DEBUG"]:
app.config["FHOST_USE_X_ACCEL_REDIRECT"] = False
if app.config["DEBUG"]:
app.config["FHOST_USE_X_ACCEL_REDIRECT"] = False
if app.config["NSFW_DETECT"]:
from nsfw_detect import NSFWDetector
@ -97,7 +98,7 @@ if app.config["NSFW_DETECT"]:
try:
mimedetect = Magic(mime=True, mime_encoding=False)
except:
except TypeError:
print("""Error: You have installed the wrong version of the 'magic' module.
Please install python-magic.""")
sys.exit(1)
@ -105,10 +106,11 @@ Please install python-magic.""")
db = SQLAlchemy(app)
migrate = Migrate(app, db)
class URL(db.Model):
__tablename__ = "URL"
id = db.Column(db.Integer, primary_key = True)
url = db.Column(db.UnicodeText, unique = True)
id = db.Column(db.Integer, primary_key=True)
url = db.Column(db.UnicodeText, unique=True)
def __init__(self, url):
self.url = url
@ -119,6 +121,7 @@ class URL(db.Model):
def geturl(self):
return url_for("get", path=self.getname(), _external=True) + "\n"
@staticmethod
def get(url):
u = URL.query.filter_by(url=url).first()
@ -129,12 +132,35 @@ class URL(db.Model):
return u
class IPAddress(types.TypeDecorator):
impl = types.LargeBinary
cache_ok = True
def process_bind_param(self, value, dialect):
match value:
case ipaddress.IPv6Address():
value = (value.ipv4_mapped or value).packed
case ipaddress.IPv4Address():
value = value.packed
return value
def process_result_value(self, value, dialect):
if value is not None:
value = ipaddress.ip_address(value)
if type(value) is ipaddress.IPv6Address:
value = value.ipv4_mapped or value
return value
class File(db.Model):
id = db.Column(db.Integer, primary_key = True)
sha256 = db.Column(db.String, unique = True)
id = db.Column(db.Integer, primary_key=True)
sha256 = db.Column(db.String, unique=True)
ext = db.Column(db.UnicodeText)
mime = db.Column(db.UnicodeText)
addr = db.Column(db.UnicodeText)
addr = db.Column(IPAddress(16))
ua = db.Column(db.UnicodeText)
removed = db.Column(db.Boolean, default=False)
nsfw_score = db.Column(db.Float)
@ -155,18 +181,19 @@ class File(db.Model):
@property
def is_nsfw(self) -> bool:
return self.nsfw_score and self.nsfw_score > app.config["NSFW_THRESHOLD"]
if self.nsfw_score:
return self.nsfw_score > app.config["NSFW_THRESHOLD"]
return False
def getname(self):
return u"{0}{1}".format(su.enbase(self.id), self.ext)
def geturl(self):
n = self.getname()
a = "nsfw" if self.is_nsfw else None
if self.is_nsfw:
return url_for("get", path=n, secret=self.secret, _external=True, _anchor="nsfw") + "\n"
else:
return url_for("get", path=n, secret=self.secret, _external=True) + "\n"
return url_for("get", path=n, secret=self.secret,
_external=True, _anchor=a) + "\n"
def getpath(self) -> Path:
return Path(app.config["FHOST_STORAGE_PATH"]) / self.sha256
@ -177,33 +204,37 @@ class File(db.Model):
self.removed = permanent
self.getpath().unlink(missing_ok=True)
# Returns the epoch millisecond that a file should expire
#
# Uses the expiration time provided by the user (requested_expiration)
# upper-bounded by an algorithm that computes the size based on the size of the
# file.
#
# That is, all files are assigned a computed expiration, which can voluntarily
# shortened by the user either by providing a timestamp in epoch millis or a
# duration in hours.
"""
Returns the epoch millisecond that a file should expire
Uses the expiration time provided by the user (requested_expiration)
upper-bounded by an algorithm that computes the size based on the size of
the file.
That is, all files are assigned a computed expiration, which can be
voluntarily shortened by the user either by providing a timestamp in
milliseconds since epoch or a duration in hours.
"""
@staticmethod
def get_expiration(requested_expiration, size) -> int:
current_epoch_millis = time.time() * 1000;
current_epoch_millis = time.time() * 1000
# Maximum lifetime of the file in milliseconds
this_files_max_lifespan = get_max_lifespan(size);
max_lifespan = get_max_lifespan(size)
# The latest allowed expiration date for this file, in epoch millis
this_files_max_expiration = this_files_max_lifespan + 1000 * time.time();
max_expiration = max_lifespan + 1000 * time.time()
if requested_expiration is None:
return this_files_max_expiration
return max_expiration
elif requested_expiration < 1650460320000:
# Treat the requested expiration time as a duration in hours
requested_expiration_ms = requested_expiration * 60 * 60 * 1000
return min(this_files_max_expiration, current_epoch_millis + requested_expiration_ms)
return min(max_expiration,
current_epoch_millis + requested_expiration_ms)
else:
# Treat the requested expiration time as a timestamp in epoch millis
return min(this_files_max_expiration, requested_expiration)
# Treat expiration time as a timestamp in epoch millis
return min(max_expiration, requested_expiration)
"""
requested_expiration can be:
@ -211,29 +242,38 @@ class File(db.Model):
- a duration (in hours) that the file should live for
- a timestamp in epoch millis that the file should expire at
Any value greater that the longest allowed file lifespan will be rounded down to that
value.
Any value greater that the longest allowed file lifespan will be rounded
down to that value.
"""
def store(file_, requested_expiration: typing.Optional[int], addr, ua, secret: bool):
data = file_.read()
digest = sha256(data).hexdigest()
@staticmethod
def store(file_, requested_expiration: typing.Optional[int], addr, ua,
secret: bool):
fstream = file_.stream
digest = file_digest(fstream, "sha256").hexdigest()
fstream.seek(0, os.SEEK_END)
flen = fstream.tell()
fstream.seek(0)
def get_mime():
guess = mimedetect.from_buffer(data)
app.logger.debug(f"MIME - specified: '{file_.content_type}' - detected: '{guess}'")
guess = mimedetect.from_descriptor(fstream.fileno())
app.logger.debug(f"MIME - specified: '{file_.content_type}' - "
f"detected: '{guess}'")
if not file_.content_type or not "/" in file_.content_type or file_.content_type == "application/octet-stream":
if (not file_.content_type
or "/" not in file_.content_type
or file_.content_type == "application/octet-stream"):
mime = guess
else:
mime = file_.content_type
if mime in app.config["FHOST_MIME_BLACKLIST"] or guess in app.config["FHOST_MIME_BLACKLIST"]:
abort(415)
if len(mime) > 128:
abort(400)
if mime.startswith("text/") and not "charset" in mime:
for flt in MIMEFilter.query.all():
if flt.check(guess):
abort(403, flt.reason)
if mime.startswith("text/") and "charset" not in mime:
mime += "; charset=utf-8"
return mime
@ -245,7 +285,8 @@ class File(db.Model):
gmime = mime.split(";")[0]
guess = guess_extension(gmime)
app.logger.debug(f"extension - specified: '{ext}' - detected: '{guess}'")
app.logger.debug(f"extension - specified: '{ext}' - detected: "
f"'{guess}'")
if not ext:
if gmime in app.config["FHOST_EXT_OVERRIDE"]:
@ -257,7 +298,7 @@ class File(db.Model):
return ext[:app.config["FHOST_MAX_EXT_LENGTH"]] or ".bin"
expiration = File.get_expiration(requested_expiration, len(data))
expiration = File.get_expiration(requested_expiration, flen)
isnew = True
f = File.query.filter_by(sha256=digest).first()
@ -288,17 +329,17 @@ class File(db.Model):
if isnew:
f.secret = None
if secret:
f.secret = secrets.token_urlsafe(app.config["FHOST_SECRET_BYTES"])
f.secret = \
secrets.token_urlsafe(app.config["FHOST_SECRET_BYTES"])
storage = Path(app.config["FHOST_STORAGE_PATH"])
storage.mkdir(parents=True, exist_ok=True)
p = storage / digest
if not p.is_file():
with open(p, "wb") as of:
of.write(data)
file_.save(p)
f.size = len(data)
f.size = flen
if not f.nsfw_score and app.config["NSFW_DETECT"]:
f.nsfw_score = nsfw.detect(str(p))
@ -308,8 +349,129 @@ class File(db.Model):
return f, isnew
class RequestFilter(db.Model):
__tablename__ = "request_filter"
id = db.Column(db.Integer, primary_key=True)
type = db.Column(db.String(20), index=True, nullable=False)
comment = db.Column(db.UnicodeText)
__mapper_args__ = {
"polymorphic_on": type,
"with_polymorphic": "*",
"polymorphic_identity": "empty"
}
def __init__(self, comment: str = None):
self.comment = comment
class AddrFilter(RequestFilter):
addr = db.Column(IPAddress(16), unique=True)
__mapper_args__ = {"polymorphic_identity": "addr"}
def __init__(self, addr: ipaddress._BaseAddress, comment: str = None):
self.addr = addr
super().__init__(comment=comment)
def check(self, addr: ipaddress._BaseAddress) -> bool:
if type(addr) is ipaddress.IPv6Address:
addr = addr.ipv4_mapped or addr
return addr == self.addr
def check_request(self, r: Request) -> bool:
return self.check(ipaddress.ip_address(r.remote_addr))
@property
def reason(self) -> str:
return f"Your IP Address ({self.addr.compressed}) is blocked from " \
"uploading files."
class IPNetwork(types.TypeDecorator):
impl = types.Text
cache_ok = True
def process_bind_param(self, value, dialect):
if value is not None:
value = value.compressed
return value
def process_result_value(self, value, dialect):
if value is not None:
value = ipaddress.ip_network(value)
return value
class NetFilter(RequestFilter):
net = db.Column(IPNetwork)
__mapper_args__ = {"polymorphic_identity": "net"}
def __init__(self, net: ipaddress._BaseNetwork, comment: str = None):
self.net = net
super().__init__(comment=comment)
def check(self, addr: ipaddress._BaseAddress) -> bool:
if type(addr) is ipaddress.IPv6Address:
addr = addr.ipv4_mapped or addr
return addr in self.net
def check_request(self, r: Request) -> bool:
return self.check(ipaddress.ip_address(r.remote_addr))
@property
def reason(self) -> str:
return f"Your network ({self.net.compressed}) is blocked from " \
"uploading files."
class HasRegex:
@declared_attr
def regex(cls):
return cls.__table__.c.get("regex", db.Column(db.UnicodeText))
def check(self, s: str) -> bool:
return re.match(self.regex, s) is not None
class MIMEFilter(HasRegex, RequestFilter):
__mapper_args__ = {"polymorphic_identity": "mime"}
def __init__(self, mime_regex: str, comment: str = None):
self.regex = mime_regex
super().__init__(comment=comment)
def check_request(self, r: Request) -> bool:
if "file" in r.files:
return self.check(r.files["file"].mimetype)
return False
@property
def reason(self) -> str:
return "File MIME type not allowed."
class UAFilter(HasRegex, RequestFilter):
__mapper_args__ = {"polymorphic_identity": "ua"}
def __init__(self, ua_regex: str, comment: str = None):
self.regex = ua_regex
super().__init__(comment=comment)
def check_request(self, r: Request) -> bool:
return self.check(r.user_agent.string)
@property
def reason(self) -> str:
return "User agent not allowed."
class UrlEncoder(object):
def __init__(self,alphabet, min_length):
def __init__(self, alphabet, min_length):
self.alphabet = alphabet
self.min_length = min_length
@ -329,17 +491,21 @@ class UrlEncoder(object):
result += self.alphabet.index(c) * (n ** i)
return result
su = UrlEncoder(alphabet=app.config["URL_ALPHABET"], min_length=1)
def fhost_url(scheme=None):
if not scheme:
return url_for(".fhost", _external=True).rstrip("/")
else:
return url_for(".fhost", _external=True, _scheme=scheme).rstrip("/")
def is_fhost_url(url):
return url.startswith(fhost_url()) or url.startswith(fhost_url("https"))
def shorten(url):
if len(url) > app.config["MAX_URL_LENGTH"]:
abort(414)
@ -351,16 +517,6 @@ def shorten(url):
return u.geturl()
def in_upload_bl(addr):
if app.config["FHOST_UPLOAD_BLACKLIST"]:
with app.open_instance_resource(app.config["FHOST_UPLOAD_BLACKLIST"], "r") as bl:
check = addr.lstrip("::ffff:")
for l in bl.readlines():
if not l.startswith("#"):
if check == l.rstrip():
return True
return False
"""
requested_expiration can be:
@ -368,13 +524,11 @@ requested_expiration can be:
- a duration (in hours) that the file should live for
- a timestamp in epoch millis that the file should expire at
Any value greater that the longest allowed file lifespan will be rounded down to that
value.
Any value greater that the longest allowed file lifespan will be rounded down
to that value.
"""
def store_file(f, requested_expiration: typing.Optional[int], addr, ua, secret: bool):
if in_upload_bl(addr):
return "Your host is blocked from uploading files.\n", 451
def store_file(f, requested_expiration: typing.Optional[int], addr, ua,
secret: bool):
sf, isnew = File.store(f, requested_expiration, addr, ua, secret)
response = make_response(sf.geturl())
@ -385,11 +539,12 @@ def store_file(f, requested_expiration: typing.Optional[int], addr, ua, secret:
return response
def store_url(url, addr, ua, secret: bool):
if is_fhost_url(url):
abort(400)
h = { "Accept-Encoding" : "identity" }
h = {"Accept-Encoding": "identity"}
r = requests.get(url, stream=True, verify=False, headers=h)
try:
@ -398,13 +553,14 @@ def store_url(url, addr, ua, secret: bool):
return str(e) + "\n"
if "content-length" in r.headers:
l = int(r.headers["content-length"])
length = int(r.headers["content-length"])
if l <= app.config["MAX_CONTENT_LENGTH"]:
if length <= app.config["MAX_CONTENT_LENGTH"]:
def urlfile(**kwargs):
return type('',(),kwargs)()
return type('', (), kwargs)()
f = urlfile(read=r.raw.read, content_type=r.headers["content-type"], filename="")
f = urlfile(read=r.raw.read,
content_type=r.headers["content-type"], filename="")
return store_file(f, None, addr, ua, secret)
else:
@ -412,10 +568,9 @@ def store_url(url, addr, ua, secret: bool):
else:
abort(411)
def manage_file(f):
try:
assert(request.form["token"] == f.mgmt_token)
except:
if request.form["token"] != f.mgmt_token:
abort(401)
if "delete" in request.form:
@ -434,6 +589,7 @@ def manage_file(f):
abort(400)
@app.route("/<path:path>", methods=["GET", "POST"])
@app.route("/s/<secret>/<path:path>", methods=["GET", "POST"])
def get(path, secret=None):
@ -470,7 +626,9 @@ def get(path, secret=None):
response.headers["Content-Length"] = f.size
response.headers["X-Accel-Redirect"] = "/" + str(fpath)
else:
response = send_from_directory(app.config["FHOST_STORAGE_PATH"], f.sha256, mimetype = f.mime)
response = send_from_directory(
app.config["FHOST_STORAGE_PATH"], f.sha256,
mimetype=f.mime)
response.headers["X-Expires"] = f.expiration
return response
@ -488,11 +646,19 @@ def get(path, secret=None):
abort(404)
@app.route("/", methods=["GET", "POST"])
def fhost():
if request.method == "POST":
for flt in RequestFilter.query.all():
if flt.check_request(request):
abort(403, flt.reason)
sf = None
secret = "secret" in request.form
addr = ipaddress.ip_address(request.remote_addr)
if type(addr) is ipaddress.IPv6Address:
addr = addr.ipv4_mapped or addr
if "file" in request.files:
try:
@ -500,7 +666,7 @@ def fhost():
return store_file(
request.files["file"],
int(request.form["expires"]),
request.remote_addr,
addr,
request.user_agent.string,
secret
)
@ -512,14 +678,14 @@ def fhost():
return store_file(
request.files["file"],
None,
request.remote_addr,
addr,
request.user_agent.string,
secret
)
elif "url" in request.form:
return store_url(
request.form["url"],
request.remote_addr,
addr,
request.user_agent.string,
secret
)
@ -530,14 +696,17 @@ def fhost():
else:
return render_template("index.html")
@app.route("/robots.txt")
def robots():
return """User-agent: *
Disallow: /
"""
@app.errorhandler(400)
@app.errorhandler(401)
@app.errorhandler(403)
@app.errorhandler(404)
@app.errorhandler(411)
@app.errorhandler(413)
@ -546,20 +715,23 @@ Disallow: /
@app.errorhandler(451)
def ehandler(e):
try:
return render_template(f"{e.code}.html", id=id, request=request), e.code
return render_template(f"{e.code}.html", id=id, request=request,
description=e.description), e.code
except TemplateNotFound:
return "Segmentation fault\n", e.code
@app.cli.command("prune")
def prune():
"""
Clean up expired files
Deletes any files from the filesystem which have hit their expiration time. This
doesn't remove them from the database, only from the filesystem. It's recommended
that server owners run this command regularly, or set it up on a timer.
Deletes any files from the filesystem which have hit their expiration time.
This doesn't remove them from the database, only from the filesystem.
It is recommended that server owners run this command regularly, or set it
up on a timer.
"""
current_time = time.time() * 1000;
current_time = time.time() * 1000
# The path to where uploaded files are stored
storage = Path(app.config["FHOST_STORAGE_PATH"])
@ -573,7 +745,7 @@ def prune():
)
)
files_removed = 0;
files_removed = 0
# For every expired file...
for file in expired_files:
@ -586,31 +758,33 @@ def prune():
# Remove it from the file system
try:
os.remove(file_path)
files_removed += 1;
files_removed += 1
except FileNotFoundError:
pass # If the file was already gone, we're good
pass # If the file was already gone, we're good
except OSError as e:
print(e)
print(
"\n------------------------------------"
"Encountered an error while trying to remove file {file_path}. Double"
"check to make sure the server is configured correctly, permissions are"
"okay, and everything is ship shape, then try again.")
return;
"Encountered an error while trying to remove file {file_path}."
"Make sure the server is configured correctly, permissions "
"are okay, and everything is ship shape, then try again.")
return
# Finally, mark that the file was removed
file.expiration = None;
file.expiration = None
db.session.commit()
print(f"\nDone! {files_removed} file(s) removed")
""" For a file of a given size, determine the largest allowed lifespan of that file
Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well
as FHOST_{MIN,MAX}_EXPIRATION.
"""
For a file of a given size, determine the largest allowed lifespan of that file
This lifespan may be shortened by a user's request, but no files should be allowed to
expire at a point after this number.
Based on the current app's configuration:
Specifically, the MAX_CONTENT_LENGTH, as well as FHOST_{MIN,MAX}_EXPIRATION.
This lifespan may be shortened by a user's request, but no files should be
allowed to expire at a point after this number.
Value returned is a duration in milliseconds.
"""
@ -620,11 +794,13 @@ def get_max_lifespan(filesize: int) -> int:
max_size = app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024)
return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3)
def do_vscan(f):
if f["path"].is_file():
with open(f["path"], "rb") as scanf:
try:
f["result"] = list(app.config["VSCAN_SOCKET"].instream(scanf).values())[0]
res = list(app.config["VSCAN_SOCKET"].instream(scanf).values())
f["result"] = res[0]
except:
f["result"] = ("SCAN FAILED", None)
else:
@ -632,11 +808,12 @@ def do_vscan(f):
return f
@app.cli.command("vscan")
def vscan():
if not app.config["VSCAN_SOCKET"]:
print("""Error: Virus scanning enabled but no connection method specified.
Please set VSCAN_SOCKET.""")
print("Error: Virus scanning enabled but no connection method "
"specified.\nPlease set VSCAN_SOCKET.")
sys.exit(1)
qp = Path(app.config["VSCAN_QUARANTINE_PATH"])
@ -650,9 +827,11 @@ Please set VSCAN_SOCKET.""")
File.last_vscan == None),
File.removed == False)
else:
res = File.query.filter(File.last_vscan == None, File.removed == False)
res = File.query.filter(File.last_vscan == None,
File.removed == False)
work = [{"path" : f.getpath(), "name" : f.getname(), "id" : f.id} for f in res]
work = [{"path": f.getpath(), "name": f.getname(), "id": f.id}
for f in res]
results = []
for i, r in enumerate(p.imap_unordered(do_vscan, work)):
@ -666,9 +845,10 @@ Please set VSCAN_SOCKET.""")
found = True
results.append({
"id" : r["id"],
"last_vscan" : None if r["result"][0] == "SCAN FAILED" else datetime.datetime.now(),
"removed" : found})
"id": r["id"],
"last_vscan": None if r["result"][0] == "SCAN FAILED"
else datetime.datetime.now(),
"removed": found})
db.session.bulk_update_mappings(File, results)
db.session.commit()

View file

@ -139,30 +139,6 @@ FHOST_EXT_OVERRIDE = {
"text/x-diff" : ".diff",
}
# Control which files aren't allowed to be uploaded
#
# Certain kinds of files are never accepted. If the file claims to be one of
# these types of files, or if we look at the contents of the file and it looks
# like one of these filetypes, then we reject the file outright with a 415
# UNSUPPORTED MEDIA EXCEPTION
FHOST_MIME_BLACKLIST = [
"application/x-dosexec",
"application/java-archive",
"application/java-vm"
]
# A list of IP addresses which are blacklisted from uploading files
#
# Can be set to the path of a file with an IP address on each line. The file
# can also include comment lines using a pound sign (#). Paths are resolved
# relative to the instance/ directory.
#
# If this is set to None, then no IP blacklist will be consulted.
FHOST_UPLOAD_BLACKLIST = None
# Enables support for detecting NSFW images
#
# Consult README.md for additional dependencies before setting to True
@ -176,7 +152,7 @@ NSFW_DETECT = False
# are marked as NSFW.
#
# If NSFW_DETECT is set to False, then this has no effect.
NSFW_THRESHOLD = 0.608
NSFW_THRESHOLD = 0.92
# If you want to scan files for viruses using ClamAV, specify the socket used

View file

@ -81,6 +81,7 @@ def run_migrations_online():
finally:
connection.close()
if context.is_offline_mode():
run_migrations_offline()
else:

View file

@ -15,12 +15,8 @@ import sqlalchemy as sa
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('file', sa.Column('mgmt_token', sa.String(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('file', 'mgmt_token')
# ### end Alembic commands ###

View file

@ -15,28 +15,22 @@ import sqlalchemy as sa
def upgrade():
### commands auto generated by Alembic - please adjust! ###
op.create_table('URL',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('url', sa.UnicodeText(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('url')
)
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('url', sa.UnicodeText(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('url'))
op.create_table('file',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('sha256', sa.String(), nullable=True),
sa.Column('ext', sa.UnicodeText(), nullable=True),
sa.Column('mime', sa.UnicodeText(), nullable=True),
sa.Column('addr', sa.UnicodeText(), nullable=True),
sa.Column('removed', sa.Boolean(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('sha256')
)
### end Alembic commands ###
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('sha256', sa.String(), nullable=True),
sa.Column('ext', sa.UnicodeText(), nullable=True),
sa.Column('mime', sa.UnicodeText(), nullable=True),
sa.Column('addr', sa.UnicodeText(), nullable=True),
sa.Column('removed', sa.Boolean(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('sha256'))
def downgrade():
### commands auto generated by Alembic - please adjust! ###
op.drop_table('file')
op.drop_table('URL')
### end Alembic commands ###

View file

@ -19,6 +19,7 @@ from pathlib import Path
Base = automap_base()
def upgrade():
op.add_column('file', sa.Column('size', sa.BigInteger(), nullable=True))
bind = op.get_bind()
@ -34,8 +35,8 @@ def upgrade():
p = storage / f.sha256
if p.is_file():
updates.append({
"id" : f.id,
"size" : p.stat().st_size
"id": f.id,
"size": p.stat().st_size
})
session.bulk_update_mappings(File, updates)

View file

@ -0,0 +1,79 @@
"""Add request filters
Revision ID: 5cda1743b92d
Revises: dd0766afb7d2
Create Date: 2024-09-27 12:13:16.845981
"""
# revision identifiers, used by Alembic.
revision = '5cda1743b92d'
down_revision = 'dd0766afb7d2'
from alembic import op
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from flask import current_app
import ipaddress
Base = automap_base()
def upgrade():
op.create_table('request_filter',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('type', sa.String(length=20), nullable=False),
sa.Column('comment', sa.UnicodeText(), nullable=True),
sa.Column('addr', sa.LargeBinary(length=16),
nullable=True),
sa.Column('net', sa.Text(), nullable=True),
sa.Column('regex', sa.UnicodeText(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('addr'))
with op.batch_alter_table('request_filter', schema=None) as batch_op:
batch_op.create_index(batch_op.f('ix_request_filter_type'), ['type'],
unique=False)
bind = op.get_bind()
Base.prepare(autoload_with=bind)
RequestFilter = Base.classes.request_filter
session = Session(bind=bind)
blp = current_app.config.get("FHOST_UPLOAD_BLACKLIST")
if blp:
with current_app.open_instance_resource(blp, "r") as bl:
for line in bl.readlines():
if not line.startswith("#"):
line = line.strip()
if line.endswith(":"):
# old implementation uses str.startswith,
# which does not translate to networks
current_app.logger.warning(
f"Ignored address: {line}")
continue
addr = ipaddress.ip_address(line).packed
flt = RequestFilter(type="addr", addr=addr)
session.add(flt)
for mime in current_app.config.get("FHOST_MIME_BLACKLIST", []):
flt = RequestFilter(type="mime", regex=mime)
session.add(flt)
session.commit()
w = "Entries in your host and MIME blacklists have been migrated to " \
"request filters and stored in the databaes, where possible. " \
"The corresponding files and config options may now be deleted. " \
"Note that you may have to manually restore them if you wish to " \
"revert this with a db downgrade operation."
current_app.logger.warning(w)
def downgrade():
with op.batch_alter_table('request_filter', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_request_filter_type'))
op.drop_table('request_filter')

View file

@ -15,12 +15,9 @@ import sqlalchemy as sa
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('file', sa.Column('last_vscan', sa.DateTime(), nullable=True))
# ### end Alembic commands ###
op.add_column('file', sa.Column('last_vscan', sa.DateTime(),
nullable=True))
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('file', 'last_vscan')
# ### end Alembic commands ###

View file

@ -15,12 +15,8 @@ import sqlalchemy as sa
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('file', sa.Column('nsfw_score', sa.Float(), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('file', 'nsfw_score')
# ### end Alembic commands ###

View file

@ -21,24 +21,29 @@ from sqlalchemy.orm import Session
import os
import time
""" For a file of a given size, determine the largest allowed lifespan of that file
Based on the current app's configuration: Specifically, the MAX_CONTENT_LENGTH, as well
as FHOST_{MIN,MAX}_EXPIRATION.
"""
For a file of a given size, determine the largest allowed lifespan of that file
This lifespan may be shortened by a user's request, but no files should be allowed to
expire at a point after this number.
Based on the current app's configuration:
Specifically, the MAX_CONTENT_LENGTH, as well as FHOST_{MIN,MAX}_EXPIRATION.
This lifespan may be shortened by a user's request, but no files should be
allowed to expire at a point after this number.
Value returned is a duration in milliseconds.
"""
def get_max_lifespan(filesize: int) -> int:
min_exp = current_app.config.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000)
max_exp = current_app.config.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000)
max_size = current_app.config.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024)
cfg = current_app.config
min_exp = cfg.get("FHOST_MIN_EXPIRATION", 30 * 24 * 60 * 60 * 1000)
max_exp = cfg.get("FHOST_MAX_EXPIRATION", 365 * 24 * 60 * 60 * 1000)
max_size = cfg.get("MAX_CONTENT_LENGTH", 256 * 1024 * 1024)
return min_exp + int((-max_exp + min_exp) * (filesize / max_size - 1) ** 3)
Base = automap_base()
def upgrade():
op.add_column('file', sa.Column('expiration', sa.BigInteger()))
@ -48,14 +53,14 @@ def upgrade():
session = Session(bind=bind)
storage = Path(current_app.config["FHOST_STORAGE_PATH"])
current_time = time.time() * 1000;
current_time = time.time() * 1000
# List of file hashes which have not expired yet
# This could get really big for some servers
try:
unexpired_files = os.listdir(storage)
except FileNotFoundError:
return # There are no currently unexpired files
return # There are no currently unexpired files
# Calculate an expiration date for all existing files
@ -65,7 +70,7 @@ def upgrade():
sa.not_(File.removed)
)
)
updates = [] # We coalesce updates to the database here
updates = [] # We coalesce updates to the database here
# SQLite has a hard limit on the number of variables so we
# need to do this the slow way
@ -74,13 +79,18 @@ def upgrade():
for file in files:
file_path = storage / file.sha256
stat = os.stat(file_path)
max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms
file_birth = stat.st_mtime * 1000 # When the file was created, in ms
updates.append({'id': file.id, 'expiration': int(file_birth + max_age)})
# How long the file is allowed to live, in ms
max_age = get_max_lifespan(stat.st_size)
# When the file was created, in ms
file_birth = stat.st_mtime * 1000
updates.append({
'id': file.id,
'expiration': int(file_birth + max_age)})
# Apply coalesced updates
session.bulk_update_mappings(File, updates)
session.commit()
def downgrade():
op.drop_column('file', 'expiration')

View file

@ -0,0 +1,78 @@
"""Change File.addr to IPAddress type
Revision ID: d9a53a28ba54
Revises: 5cda1743b92d
Create Date: 2024-09-27 14:03:06.764764
"""
# revision identifiers, used by Alembic.
revision = 'd9a53a28ba54'
down_revision = '5cda1743b92d'
from alembic import op
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from flask import current_app
import ipaddress
Base = automap_base()
def upgrade():
with op.batch_alter_table('file', schema=None) as batch_op:
batch_op.add_column(sa.Column('addr_tmp', sa.LargeBinary(16),
nullable=True))
bind = op.get_bind()
Base.prepare(autoload_with=bind)
File = Base.classes.file
session = Session(bind=bind)
updates = []
stmt = sa.select(File).where(sa.not_(File.addr == None))
for f in session.scalars(stmt.execution_options(yield_per=1000)):
addr = ipaddress.ip_address(f.addr)
if type(addr) is ipaddress.IPv6Address:
addr = addr.ipv4_mapped or addr
updates.append({
"id": f.id,
"addr_tmp": addr.packed
})
session.execute(sa.update(File), updates)
with op.batch_alter_table('file', schema=None) as batch_op:
batch_op.drop_column('addr')
batch_op.alter_column('addr_tmp', new_column_name='addr')
def downgrade():
with op.batch_alter_table('file', schema=None) as batch_op:
batch_op.add_column(sa.Column('addr_tmp', sa.UnicodeText,
nullable=True))
bind = op.get_bind()
Base.prepare(autoload_with=bind)
File = Base.classes.file
session = Session(bind=bind)
updates = []
stmt = sa.select(File).where(sa.not_(File.addr == None))
for f in session.scalars(stmt.execution_options(yield_per=1000)):
addr = ipaddress.ip_address(f.addr)
if type(addr) is ipaddress.IPv6Address:
addr = addr.ipv4_mapped or addr
updates.append({
"id": f.id,
"addr_tmp": addr.compressed
})
session.execute(sa.update(File), updates)
with op.batch_alter_table('file', schema=None) as batch_op:
batch_op.drop_column('addr')
batch_op.alter_column('addr_tmp', new_column_name='addr')

View file

@ -15,16 +15,10 @@ import sqlalchemy as sa