Commit a5dae1ca authored by chris's avatar chris
Browse files

Improved performance of database_files_dump by only exporting

content from the database if there's a difference in file hashes.
parent c1e05c7a
VERSION = (0, 1, 5)
VERSION = (0, 1, 6)
__version__ = '.'.join(map(str, VERSION))
\ No newline at end of file
......@@ -6,15 +6,15 @@ from django.core.management.base import BaseCommand, CommandError
from django.db.models import FileField, ImageField
from database_files.models import File
from database_files.utils import write_file
from database_files.utils import write_file, is_fresh
from optparse import make_option
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
make_option('-w', '--overwrite', action='store_true',
dest='overwrite', default=False,
help='If given, overwrites any existing files.'),
# make_option('-w', '--overwrite', action='store_true',
# dest='overwrite', default=False,
# help='If given, overwrites any existing files.'),
)
help = 'Dumps all files in the database referenced by FileFields ' + \
'or ImageFields onto the filesystem in the directory specified by ' + \
......@@ -24,17 +24,21 @@ class Command(BaseCommand):
tmp_debug = settings.DEBUG
settings.DEBUG = False
try:
q = File.objects.all()
q = File.objects.all().values_list('id', 'name', '_content_hash')
total = q.count()
i = 0
for file in q:
for (file_id, name, content_hash) in q:
i += 1
if not i % 100:
print '%i of %i' % (i, total)
write_file(
file.name,
file.content,
options['overwrite'])
if not is_fresh(name=name, content_hash=content_hash):
print 'File %i-%s is stale. Writing to local file system...' \
% (file_id, name)
file = File.objects.get(id=file_id)
write_file(
file.name,
file.content,
overwrite=True)
finally:
settings.DEBUG = tmp_debug
\ No newline at end of file
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
from django.utils import timezone
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding field 'File.created_datetime'
db.add_column('database_files_file', 'created_datetime',
self.gf('django.db.models.fields.DateTimeField')(default=timezone.now, db_index=True),
keep_default=False)
# Adding field 'File._content_hash'
db.add_column('database_files_file', '_content_hash',
self.gf('django.db.models.fields.CharField')(max_length=128, null=True, db_column='content_hash', blank=True),
keep_default=False)
def backwards(self, orm):
# Deleting field 'File.created_datetime'
db.delete_column('database_files_file', 'created_datetime')
# Deleting field 'File._content_hash'
db.delete_column('database_files_file', 'content_hash')
models = {
'database_files.file': {
'Meta': {'object_name': 'File'},
'_content': ('django.db.models.fields.TextField', [], {'db_column': "'content'"}),
'_content_hash': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'db_column': "'content_hash'", 'blank': 'True'}),
'created_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'timezone.now', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
'size': ('django.db.models.fields.PositiveIntegerField', [], {})
}
}
complete_apps = ['database_files']
\ No newline at end of file
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import DataMigration
from django.db import models
import base64
from database_files import utils
class Migration(DataMigration):
def forwards(self, orm):
"Write your forwards methods here."
File = orm['database_files.File']
q = File.objects.all()
for f in q:
f._content_hash = utils.get_text_hash_0004(base64.b64decode(f._content))
f.save()
def backwards(self, orm):
"Write your backwards methods here."
models = {
'database_files.file': {
'Meta': {'object_name': 'File'},
'_content': ('django.db.models.fields.TextField', [], {'db_column': "'content'"}),
'_content_hash': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'db_column': "'content_hash'", 'blank': 'True'}),
'created_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
'size': ('django.db.models.fields.PositiveIntegerField', [], {})
}
}
complete_apps = ['database_files']
symmetrical = True
# -*- coding: utf-8 -*-
import datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding index on 'File', fields ['_content_hash']
db.create_index('database_files_file', ['content_hash'])
# Adding index on 'File', fields ['size']
db.create_index('database_files_file', ['size'])
def backwards(self, orm):
# Removing index on 'File', fields ['size']
db.delete_index('database_files_file', ['size'])
# Removing index on 'File', fields ['_content_hash']
db.delete_index('database_files_file', ['content_hash'])
models = {
'database_files.file': {
'Meta': {'object_name': 'File'},
'_content': ('django.db.models.fields.TextField', [], {'db_column': "'content'"}),
'_content_hash': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '128', 'null': 'True', 'db_column': "'content_hash'", 'blank': 'True'}),
'created_datetime': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
'size': ('django.db.models.fields.PositiveIntegerField', [], {'db_index': 'True'})
}
}
complete_apps = ['database_files']
\ No newline at end of file
import base64
from django.db import models
from django.utils import timezone
from database_files import utils
from database_files.manager import FileManager
class File(models.Model):
......@@ -16,11 +18,36 @@ class File(models.Model):
db_index=True)
size = models.PositiveIntegerField(
db_index=True,
blank=False,
null=False)
_content = models.TextField(db_column='content')
created_datetime = models.DateTimeField(
db_index=True,
default=timezone.now,
verbose_name="Created datetime")
_content_hash = models.CharField(
db_column='content_hash',
db_index=True,
max_length=128,
blank=True, null=True)
def save(self, *args, **kwargs):
# Check for and clear old content hash.
if self.id:
old = File.objects.get(id=self.id)
if old._content != self._content:
self._content_hash = None
# Recalculate new content hash.
self.content_hash
return super(File, self).save(*args, **kwargs)
@property
def content(self):
return base64.b64decode(self._content)
......@@ -28,4 +55,10 @@ class File(models.Model):
@content.setter
def content(self, v):
self._content = base64.b64encode(v)
\ No newline at end of file
@property
def content_hash(self):
if not self._content_hash and self._content:
self._content_hash = utils.get_text_hash(self.content)
return self._content_hash
\ No newline at end of file
#from grp import getgrnam
#from pwd import getpwnam
import os
import hashlib
from django.conf import settings
def is_fresh(name, content_hash):
"""
Returns true if the file exists on the local filesystem and matches the
content in the database. Returns false otherwise.
"""
if not content_hash:
return False
fqfn = os.path.join(settings.MEDIA_ROOT, name)
fqfn = os.path.normpath(fqfn)
if not os.path.isfile(fqfn):
return False
local_content_hash = get_file_hash(fqfn)
return local_content_hash == content_hash
def write_file(name, content, overwrite=False):
"""
Writes the given content to the relative filename under the MEDIA_ROOT.
......@@ -28,4 +43,29 @@ def write_file(name, content, overwrite=False):
# Set permissions.
perms = getattr(settings, 'DATABASE_FILES_PERMS', None)
if perms:
os.system('chmod -R %s "%s"' % (perms, dirs))
\ No newline at end of file
os.system('chmod -R %s "%s"' % (perms, dirs))
def get_file_hash(fin):
"""
Iteratively builds a file hash without loading the entire file into memory.
"""
if isinstance(fin, basestring):
fin = open(fin)
h = hashlib.sha512()
for text in fin.readlines():
if not isinstance(text, unicode):
text = unicode(text, encoding='utf-8', errors='replace')
h.update(text.encode('utf-8', 'replace'))
return h.hexdigest()
def get_text_hash(text):
"""
Returns the hash of the given text.
"""
h = hashlib.sha512()
if not isinstance(text, unicode):
text = unicode(text, encoding='utf-8', errors='replace')
h.update(text.encode('utf-8', 'replace'))
return h.hexdigest()
get_text_hash_0004 = get_text_hash
......@@ -4,6 +4,16 @@ import os
from distutils.core import setup, Command
import database_files
def get_reqs(reqs=[]):
# optparse is included with Python <= 2.7, but has been deprecated in favor
# of argparse. We try to import argparse and if we can't, then we'll add
# it to the requirements
try:
import argparse
except ImportError:
reqs.append("argparse>=1.1")
return reqs
class TestCommand(Command):
description = "Runs unittests."
user_options = []
......@@ -35,6 +45,7 @@ setup(
'Operating System :: OS Independent',
'Programming Language :: Python',
],
requires = ["Django (>=1.4)",],
cmdclass={
'test': TestCommand,
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment