Commit 77c8d023 authored by chris's avatar chris
Browse files

Added file hash caching on the filesystem.

parent 7fa544b7
import os
from optparse import make_option
from django.conf import settings
from django.core.files.storage import default_storage
from django.core.management.base import BaseCommand, CommandError
from django.db.models import FileField, ImageField
from database_files.models import File
from database_files.utils import write_file, is_fresh
from optparse import make_option
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
......@@ -21,24 +16,5 @@ class Command(BaseCommand):
'MEDIA_ROOT.'
def handle(self, *args, **options):
tmp_debug = settings.DEBUG
settings.DEBUG = False
try:
q = File.objects.all().values_list('id', 'name', '_content_hash')
total = q.count()
i = 0
for (file_id, name, content_hash) in q:
i += 1
if not i % 100:
print '%i of %i' % (i, total)
if not is_fresh(name=name, content_hash=content_hash):
print 'File %i-%s is stale. Writing to local file system...' \
% (file_id, name)
file = File.objects.get(id=file_id)
write_file(
file.name,
file.content,
overwrite=True)
finally:
settings.DEBUG = tmp_debug
File.dump_files(verbose=True)
\ No newline at end of file
......@@ -3,6 +3,5 @@ import os
class FileManager(models.Manager):
def get_from_name(self, name):
# print 'name:',name
# return self.get(pk=os.path.splitext(os.path.split(name)[1])[0])
return self.get(name=name)
\ No newline at end of file
import base64
from django.conf import settings
from django.db import models
from django.utils import timezone
from database_files import utils
from database_files.utils import write_file, is_fresh
from database_files.manager import FileManager
class File(models.Model):
......@@ -62,3 +64,31 @@ class File(models.Model):
self._content_hash = utils.get_text_hash(self.content)
return self._content_hash
@classmethod
def dump_files(cls, debug=True, verbose=False):
if debug:
tmp_debug = settings.DEBUG
settings.DEBUG = False
try:
q = cls.objects.only('id', 'name', '_content_hash').values_list('id', 'name', '_content_hash')
total = q.count()
if verbose:
print 'Checking %i total files...' % (total,)
i = 0
for (file_id, name, content_hash) in q:
i += 1
if verbose and not i % 100:
print '%i of %i' % (i, total)
if not is_fresh(name=name, content_hash=content_hash):
if verbose:
print 'File %i-%s is stale. Writing to local file system...' \
% (file_id, name)
file = File.objects.get(id=file_id)
write_file(
file.name,
file.content,
overwrite=True)
finally:
if debug:
settings.DEBUG = tmp_debug
\ No newline at end of file
......@@ -7,7 +7,7 @@ from django.core.files.storage import FileSystemStorage
from django.core.urlresolvers import reverse
from database_files import models
from database_files.utils import write_file
from database_files import utils
class DatabaseStorage(FileSystemStorage):
......@@ -67,7 +67,7 @@ class DatabaseStorage(FileSystemStorage):
)
# Automatically write the change to the local file system.
if getattr(settings, 'DATABASE_FILES_FS_AUTO_WRITE', True):
write_file(name, content, overwrite=True)
utils.write_file(name, content, overwrite=True)
#TODO:add callback to handle custom save behavior?
return self._generate_name(name, f.pk)
......@@ -86,6 +86,9 @@ class DatabaseStorage(FileSystemStorage):
"""
try:
models.File.objects.get_from_name(name).delete()
hash_fn = utils.get_hash_fn(name)
if os.path.isfile(hash_fn):
os.remove(hash_fn)
except models.File.DoesNotExist:
pass
return super(DatabaseStorage, self).delete(name)
......
......@@ -13,6 +13,7 @@ from database_files import utils
DIR = os.path.abspath(os.path.split(__file__)[0])
class DatabaseFilesTestCase(TestCase):
def test_adding_file(self):
# Create default thing storing reference to file
......@@ -67,6 +68,8 @@ class DatabaseFilesTestCase(TestCase):
t.upload.delete()
self.assertEqual(File.objects.count(), 1)
File.dump_files()
# Confirm when delete a file from the database, we also delete it from
# the filesystem.
self.assertEqual(default_storage.exists('i/special/test.txt'), True)
......@@ -75,6 +78,7 @@ class DatabaseFilesTestCase(TestCase):
self.assertEqual(os.path.isfile(fqfn), False)
def test_hash(self):
verbose = 0
# Create test file.
media_dir = os.path.join(DIR, 'media/i/special')
......@@ -86,14 +90,18 @@ class DatabaseFilesTestCase(TestCase):
hashes = set()
h = utils.get_text_hash(image_content)
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_file_hash(fqfn)
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'rb').read())
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'r').read())
hashes.add(h); print h
print 'Hashes:', len(hashes)
hashes.add(h)
if verbose: print h
#print 'Hashes:', len(hashes)
# Create test file.
self.assertEqual(len(hashes), 1)
......@@ -103,14 +111,18 @@ class DatabaseFilesTestCase(TestCase):
hashes = set()
h = utils.get_text_hash(image_content)
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_file_hash(fqfn)
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'rb').read())
hashes.add(h); print h
hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'r').read())
hashes.add(h); print h
print 'Hashes:', len(hashes)
hashes.add(h)
if verbose: print h
#print 'Hashes:', len(hashes)
self.assertEqual(len(hashes), 1)
......
......@@ -8,6 +8,7 @@ from django.conf import settings
DEFAULT_ENFORCE_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENFORCE_ENCODING', True)
DEFAULT_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENCODING', 'ascii')
DEFAULT_ERROR_METHOD = getattr(settings, 'DB_FILES_DEFAULT_ERROR_METHOD', 'ignore')
DEFAULT_HASH_FN_TEMPLATE = getattr(settings, 'DB_FILES_DEFAULT_HASH_FN_TEMPLATE', '%s.hash')
def is_fresh(name, content_hash):
"""
......@@ -16,6 +17,13 @@ def is_fresh(name, content_hash):
"""
if not content_hash:
return False
# Check for cached hash file.
hash_fn = get_hash_fn(name)
if os.path.isfile(hash_fn):
return open(hash_fn).read().strip() == content_hash
# Otherwise, calculate the hash of the local file.
fqfn = os.path.join(settings.MEDIA_ROOT, name)
fqfn = os.path.normpath(fqfn)
if not os.path.isfile(fqfn):
......@@ -23,6 +31,20 @@ def is_fresh(name, content_hash):
local_content_hash = get_file_hash(fqfn)
return local_content_hash == content_hash
def get_hash_fn(name):
"""
Returns the filename for the hash file.
"""
fqfn = os.path.join(settings.MEDIA_ROOT, name)
fqfn = os.path.normpath(fqfn)
dirs,fn = os.path.split(fqfn)
if not os.path.isdir(dirs):
os.makedirs(dirs)
fqfn_parts = os.path.split(fqfn)
hash_fn = os.path.join(fqfn_parts[0],
DEFAULT_HASH_FN_TEMPLATE % fqfn_parts[1])
return hash_fn
def write_file(name, content, overwrite=False):
"""
Writes the given content to the relative filename under the MEDIA_ROOT.
......@@ -36,6 +58,11 @@ def write_file(name, content, overwrite=False):
os.makedirs(dirs)
open(fqfn, 'wb').write(content)
# Cache hash.
hash = get_file_hash(fqfn)
hash_fn = get_hash_fn(name)
open(hash_fn, 'wb').write(hash)
# Set ownership and permissions.
uname = getattr(settings, 'DATABASE_FILES_USER', None)
gname = getattr(settings, 'DATABASE_FILES_GROUP', None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment