Ticket #6729: 0001-bug-6729-hash-and-check-for-dupes-before-saving-do.patch

File 0001-bug-6729-hash-and-check-for-dupes-before-saving-do.patch, 2.7 kB (added by homunq, 6 years ago)
  • readactivity.py

    From 2c5dcb163c3f4704d27637d7b440efa1f8f0888d Mon Sep 17 00:00:00 2001
    From: Jameson Quinn <jameson.quinn@gmail.com>
    Date: Thu, 14 Aug 2008 13:38:25 -0600
    Subject: [PATCH] bug #6729: hash and check for dupes before saving downloaded pdfs
    
    ---
     readactivity.py |   27 ++++++++++++++++++---------
     1 files changed, 18 insertions(+), 9 deletions(-)
    
    diff --git a/readactivity.py b/readactivity.py
    index 38f3266..67fe1f6 100644
    a b  
    1515# along with this program; if not, write to the Free Software 
    1616# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA 
    1717 
     18from __future__ import with_statement 
    1819import logging 
    1920import os 
    2021import tempfile 
     
    244245 
    245246        self.metadata['Read_search'] = self._edit_toolbar._search_entry.props.text 
    246247 
     248    def _revalidate_jobject(self, jobject): 
     249        return datastore.get(jobject.object_id) 
     250         
    247251    def _ensure_in_datastore(self, tempfile): 
    248         import hashlib,json 
     252        import hashlib 
    249253        filehash = hashlib.md5() 
    250254         
    251         with f=ChunkyFile(tempfile): 
     255        with ChunkyFile(tempfile) as f: 
    252256            for chunk in f.read_chunks(): 
    253257                filehash.update(chunk) 
    254         digest = filehash.hexdigest() 
     258        hash = filehash.hexdigest() 
     259        _logger.debug("Got file with hash %s...", hash) 
     260         
     261        results, numresults = datastore.find({'hash':hash}) 
     262        _logger.debug("%s results in datastore...", str(numresults)) 
    255263         
    256         results, numresults = datastore.find({'hash':digest}) 
    257264        if numresults: 
    258265            _logger.debug("File %s already in datastore (id %s)...", tempfile, 
    259                           results[0]['uid']) 
    260             self._jobject = datastore.get(results[0]['uid']) 
    261             os.rm(tempfile) 
     266                          results[0].object_id) 
     267            self._jobject = results[0] 
     268            os.remove(tempfile) 
    262269            return self._jobject.file_path 
    263270             
    264271        else: 
    265272            _logger.debug("Moving file %s to datastore...", tempfile) 
    266273            self._jobject.file_path = tempfile 
    267             self._jobject.metadata['hash'] = filehash.hexdigest() 
     274            self._jobject.metadata['hash'] = hash 
    268275            datastore.write(self._jobject, transfer_ownership=True) 
    269             return tempfile 
     276            self._jobject = self._revalidate_jobject(self._jobject) 
     277            _logger.debug("File path in datastore is %s", self._jobject.file_path) 
     278            return self._jobject.file_path 
    270279 
    271280    def _download_result_cb(self, getter, tempfile, suggested_name, tube_id): 
    272281        del self.unused_download_tubes