Support user-specified expiration times #72
1 changed files with 12 additions and 12 deletions
|
@ -62,7 +62,7 @@ def upgrade():
|
||||||
# List of file hashes which have not expired yet
|
# List of file hashes which have not expired yet
|
||||||
# This could get really big for some servers
|
# This could get really big for some servers
|
||||||
try:
|
try:
|
||||||
unexpired_files = set(os.listdir(storage))
|
unexpired_files = os.listdir(storage)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return # There are no currently unexpired files
|
return # There are no currently unexpired files
|
||||||
|
|
||||||
Ember marked this conversation as resolved
Outdated
|
|||||||
|
@ -70,11 +70,11 @@ def upgrade():
|
||||||
files = session.scalars(
|
files = session.scalars(
|
||||||
sa.select(File)
|
sa.select(File)
|
||||||
.where(
|
.where(
|
||||||
sa.not_(File.removed)
|
sa.not_(File.removed),
|
||||||
|
File.sha256.in_(unexpired_files)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
Ember marked this conversation as resolved
Outdated
mia
commented
Instead of doing this here, it’s probably faster to store the modified fields:
and execute a bulk update at the end:
Instead of doing this here, it’s probably faster to store the modified fields:
```python
# assume updates is a list
updates.append({
"id": file.id,
"expiration": int(file_birth + max_age)
})
```
and execute a bulk update at the end:
```python
# we’ll have to attach an SQLAlchemy session
from sqlalchemy.orm.session import Session
# reuse bind from automap example if applicable
session = Session(bind=op.get_bind())
# UpdatedFile if not using the automapped class
session.bulk_update_mappings(File, updates)
session.commit()
```
Ember
commented
Makes sense! I'll implement it when I get out of classes later today. Makes sense! I'll implement it when I get out of classes later today.
Ember
commented
Resolved in Resolved in 55ee374
|
|||||||
for file in files:
|
for file in files:
|
||||||
if file.sha256 in unexpired_files:
|
|
||||||
file_path = storage / file.sha256
|
file_path = storage / file.sha256
|
||||||
stat = os.stat(file_path)
|
stat = os.stat(file_path)
|
||||||
max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms
|
max_age = get_max_lifespan(stat.st_size) # How long the file is allowed to live, in ms
|
||||||
|
|
Loading…
Reference in a new issue
You can change this query to:
File.query.where(sa.not_(File.removed), File.sha256.in_(unexpired_files))
, also omitting the.all()
. This is much faster on my instance, which has over a million results for the unmodified query. That’s a lot of objects we don’t need!I deliberately chose not to do that here because I'm worried that the query will become too large, and I wasn't sure whether or not sqlalchemy has provisions built in to handle that case.
I've had trouble before with very large
in y
queries when working directly with the sqlite library, and I didn't want to chance it. If you think it's safe though, I'll make the change.I think that should be safe unless the SQL statement somehow grows larger than a gigabyte (SQLite’s default limit), which seems unlikely.
Resolved in
19d989b