Commit 77c8d023 authored by chris's avatar chris
Browse files

Added file hash caching on the filesystem.

parent 7fa544b7
import os import os
from optparse import make_option
from django.conf import settings
from django.core.files.storage import default_storage
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from django.db.models import FileField, ImageField
from database_files.models import File from database_files.models import File
from database_files.utils import write_file, is_fresh
from optparse import make_option
class Command(BaseCommand): class Command(BaseCommand):
option_list = BaseCommand.option_list + ( option_list = BaseCommand.option_list + (
...@@ -21,24 +16,5 @@ class Command(BaseCommand): ...@@ -21,24 +16,5 @@ class Command(BaseCommand):
'MEDIA_ROOT.' 'MEDIA_ROOT.'
def handle(self, *args, **options): def handle(self, *args, **options):
tmp_debug = settings.DEBUG File.dump_files(verbose=True)
settings.DEBUG = False
try: \ No newline at end of file
q = File.objects.all().values_list('id', 'name', '_content_hash')
total = q.count()
i = 0
for (file_id, name, content_hash) in q:
i += 1
if not i % 100:
print '%i of %i' % (i, total)
if not is_fresh(name=name, content_hash=content_hash):
print 'File %i-%s is stale. Writing to local file system...' \
% (file_id, name)
file = File.objects.get(id=file_id)
write_file(
file.name,
file.content,
overwrite=True)
finally:
settings.DEBUG = tmp_debug
\ No newline at end of file
...@@ -3,6 +3,5 @@ import os ...@@ -3,6 +3,5 @@ import os
class FileManager(models.Manager): class FileManager(models.Manager):
def get_from_name(self, name): def get_from_name(self, name):
# print 'name:',name return self.get(name=name)
# return self.get(pk=os.path.splitext(os.path.split(name)[1])[0])
return self.get(name=name) \ No newline at end of file
\ No newline at end of file
import base64 import base64
from django.conf import settings
from django.db import models from django.db import models
from django.utils import timezone from django.utils import timezone
from database_files import utils from database_files import utils
from database_files.utils import write_file, is_fresh
from database_files.manager import FileManager from database_files.manager import FileManager
class File(models.Model): class File(models.Model):
...@@ -61,4 +63,32 @@ class File(models.Model): ...@@ -61,4 +63,32 @@ class File(models.Model):
if not self._content_hash and self._content: if not self._content_hash and self._content:
self._content_hash = utils.get_text_hash(self.content) self._content_hash = utils.get_text_hash(self.content)
return self._content_hash return self._content_hash
\ No newline at end of file @classmethod
def dump_files(cls, debug=True, verbose=False):
if debug:
tmp_debug = settings.DEBUG
settings.DEBUG = False
try:
q = cls.objects.only('id', 'name', '_content_hash').values_list('id', 'name', '_content_hash')
total = q.count()
if verbose:
print 'Checking %i total files...' % (total,)
i = 0
for (file_id, name, content_hash) in q:
i += 1
if verbose and not i % 100:
print '%i of %i' % (i, total)
if not is_fresh(name=name, content_hash=content_hash):
if verbose:
print 'File %i-%s is stale. Writing to local file system...' \
% (file_id, name)
file = File.objects.get(id=file_id)
write_file(
file.name,
file.content,
overwrite=True)
finally:
if debug:
settings.DEBUG = tmp_debug
\ No newline at end of file
...@@ -7,7 +7,7 @@ from django.core.files.storage import FileSystemStorage ...@@ -7,7 +7,7 @@ from django.core.files.storage import FileSystemStorage
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from database_files import models from database_files import models
from database_files.utils import write_file from database_files import utils
class DatabaseStorage(FileSystemStorage): class DatabaseStorage(FileSystemStorage):
...@@ -67,7 +67,7 @@ class DatabaseStorage(FileSystemStorage): ...@@ -67,7 +67,7 @@ class DatabaseStorage(FileSystemStorage):
) )
# Automatically write the change to the local file system. # Automatically write the change to the local file system.
if getattr(settings, 'DATABASE_FILES_FS_AUTO_WRITE', True): if getattr(settings, 'DATABASE_FILES_FS_AUTO_WRITE', True):
write_file(name, content, overwrite=True) utils.write_file(name, content, overwrite=True)
#TODO:add callback to handle custom save behavior? #TODO:add callback to handle custom save behavior?
return self._generate_name(name, f.pk) return self._generate_name(name, f.pk)
...@@ -86,6 +86,9 @@ class DatabaseStorage(FileSystemStorage): ...@@ -86,6 +86,9 @@ class DatabaseStorage(FileSystemStorage):
""" """
try: try:
models.File.objects.get_from_name(name).delete() models.File.objects.get_from_name(name).delete()
hash_fn = utils.get_hash_fn(name)
if os.path.isfile(hash_fn):
os.remove(hash_fn)
except models.File.DoesNotExist: except models.File.DoesNotExist:
pass pass
return super(DatabaseStorage, self).delete(name) return super(DatabaseStorage, self).delete(name)
......
...@@ -13,6 +13,7 @@ from database_files import utils ...@@ -13,6 +13,7 @@ from database_files import utils
DIR = os.path.abspath(os.path.split(__file__)[0]) DIR = os.path.abspath(os.path.split(__file__)[0])
class DatabaseFilesTestCase(TestCase): class DatabaseFilesTestCase(TestCase):
def test_adding_file(self): def test_adding_file(self):
# Create default thing storing reference to file # Create default thing storing reference to file
...@@ -67,6 +68,8 @@ class DatabaseFilesTestCase(TestCase): ...@@ -67,6 +68,8 @@ class DatabaseFilesTestCase(TestCase):
t.upload.delete() t.upload.delete()
self.assertEqual(File.objects.count(), 1) self.assertEqual(File.objects.count(), 1)
File.dump_files()
# Confirm when delete a file from the database, we also delete it from # Confirm when delete a file from the database, we also delete it from
# the filesystem. # the filesystem.
self.assertEqual(default_storage.exists('i/special/test.txt'), True) self.assertEqual(default_storage.exists('i/special/test.txt'), True)
...@@ -75,6 +78,7 @@ class DatabaseFilesTestCase(TestCase): ...@@ -75,6 +78,7 @@ class DatabaseFilesTestCase(TestCase):
self.assertEqual(os.path.isfile(fqfn), False) self.assertEqual(os.path.isfile(fqfn), False)
def test_hash(self): def test_hash(self):
verbose = 0
# Create test file. # Create test file.
media_dir = os.path.join(DIR, 'media/i/special') media_dir = os.path.join(DIR, 'media/i/special')
...@@ -86,14 +90,18 @@ class DatabaseFilesTestCase(TestCase): ...@@ -86,14 +90,18 @@ class DatabaseFilesTestCase(TestCase):
hashes = set() hashes = set()
h = utils.get_text_hash(image_content) h = utils.get_text_hash(image_content)
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_file_hash(fqfn) h = utils.get_file_hash(fqfn)
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'rb').read()) h = utils.get_text_hash(open(fqfn, 'rb').read())
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'r').read()) h = utils.get_text_hash(open(fqfn, 'r').read())
hashes.add(h); print h hashes.add(h)
print 'Hashes:', len(hashes) if verbose: print h
#print 'Hashes:', len(hashes)
# Create test file. # Create test file.
self.assertEqual(len(hashes), 1) self.assertEqual(len(hashes), 1)
...@@ -103,14 +111,18 @@ class DatabaseFilesTestCase(TestCase): ...@@ -103,14 +111,18 @@ class DatabaseFilesTestCase(TestCase):
hashes = set() hashes = set()
h = utils.get_text_hash(image_content) h = utils.get_text_hash(image_content)
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_file_hash(fqfn) h = utils.get_file_hash(fqfn)
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'rb').read()) h = utils.get_text_hash(open(fqfn, 'rb').read())
hashes.add(h); print h hashes.add(h)
if verbose: print h
h = utils.get_text_hash(open(fqfn, 'r').read()) h = utils.get_text_hash(open(fqfn, 'r').read())
hashes.add(h); print h hashes.add(h)
print 'Hashes:', len(hashes) if verbose: print h
#print 'Hashes:', len(hashes)
self.assertEqual(len(hashes), 1) self.assertEqual(len(hashes), 1)
......
...@@ -8,6 +8,7 @@ from django.conf import settings ...@@ -8,6 +8,7 @@ from django.conf import settings
DEFAULT_ENFORCE_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENFORCE_ENCODING', True) DEFAULT_ENFORCE_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENFORCE_ENCODING', True)
DEFAULT_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENCODING', 'ascii') DEFAULT_ENCODING = getattr(settings, 'DB_FILES_DEFAULT_ENCODING', 'ascii')
DEFAULT_ERROR_METHOD = getattr(settings, 'DB_FILES_DEFAULT_ERROR_METHOD', 'ignore') DEFAULT_ERROR_METHOD = getattr(settings, 'DB_FILES_DEFAULT_ERROR_METHOD', 'ignore')
DEFAULT_HASH_FN_TEMPLATE = getattr(settings, 'DB_FILES_DEFAULT_HASH_FN_TEMPLATE', '%s.hash')
def is_fresh(name, content_hash): def is_fresh(name, content_hash):
""" """
...@@ -16,6 +17,13 @@ def is_fresh(name, content_hash): ...@@ -16,6 +17,13 @@ def is_fresh(name, content_hash):
""" """
if not content_hash: if not content_hash:
return False return False
# Check for cached hash file.
hash_fn = get_hash_fn(name)
if os.path.isfile(hash_fn):
return open(hash_fn).read().strip() == content_hash
# Otherwise, calculate the hash of the local file.
fqfn = os.path.join(settings.MEDIA_ROOT, name) fqfn = os.path.join(settings.MEDIA_ROOT, name)
fqfn = os.path.normpath(fqfn) fqfn = os.path.normpath(fqfn)
if not os.path.isfile(fqfn): if not os.path.isfile(fqfn):
...@@ -23,6 +31,20 @@ def is_fresh(name, content_hash): ...@@ -23,6 +31,20 @@ def is_fresh(name, content_hash):
local_content_hash = get_file_hash(fqfn) local_content_hash = get_file_hash(fqfn)
return local_content_hash == content_hash return local_content_hash == content_hash
def get_hash_fn(name):
"""
Returns the filename for the hash file.
"""
fqfn = os.path.join(settings.MEDIA_ROOT, name)
fqfn = os.path.normpath(fqfn)
dirs,fn = os.path.split(fqfn)
if not os.path.isdir(dirs):
os.makedirs(dirs)
fqfn_parts = os.path.split(fqfn)
hash_fn = os.path.join(fqfn_parts[0],
DEFAULT_HASH_FN_TEMPLATE % fqfn_parts[1])
return hash_fn
def write_file(name, content, overwrite=False): def write_file(name, content, overwrite=False):
""" """
Writes the given content to the relative filename under the MEDIA_ROOT. Writes the given content to the relative filename under the MEDIA_ROOT.
...@@ -36,6 +58,11 @@ def write_file(name, content, overwrite=False): ...@@ -36,6 +58,11 @@ def write_file(name, content, overwrite=False):
os.makedirs(dirs) os.makedirs(dirs)
open(fqfn, 'wb').write(content) open(fqfn, 'wb').write(content)
# Cache hash.
hash = get_file_hash(fqfn)
hash_fn = get_hash_fn(name)
open(hash_fn, 'wb').write(hash)
# Set ownership and permissions. # Set ownership and permissions.
uname = getattr(settings, 'DATABASE_FILES_USER', None) uname = getattr(settings, 'DATABASE_FILES_USER', None)
gname = getattr(settings, 'DATABASE_FILES_GROUP', None) gname = getattr(settings, 'DATABASE_FILES_GROUP', None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment