From 2c5dcb163c3f4704d27637d7b440efa1f8f0888d Mon Sep 17 00:00:00 2001
From: Jameson Quinn <jameson.quinn@gmail.com>
Date: Thu, 14 Aug 2008 13:38:25 -0600
Subject: [PATCH] bug #6729: hash and check for dupes before saving downloaded pdfs
---
readactivity.py | 27 ++++++++++++++++++---------
1 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/readactivity.py b/readactivity.py
index 38f3266..67fe1f6 100644
|
a
|
b
|
|
| 15 | 15 | # along with this program; if not, write to the Free Software |
| 16 | 16 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 17 | 17 | |
| | 18 | from __future__ import with_statement |
| 18 | 19 | import logging |
| 19 | 20 | import os |
| 20 | 21 | import tempfile |
| … |
… |
|
| 244 | 245 | |
| 245 | 246 | self.metadata['Read_search'] = self._edit_toolbar._search_entry.props.text |
| 246 | 247 | |
| | 248 | def _revalidate_jobject(self, jobject): |
| | 249 | return datastore.get(jobject.object_id) |
| | 250 | |
| 247 | 251 | def _ensure_in_datastore(self, tempfile): |
| 248 | | import hashlib,json |
| | 252 | import hashlib |
| 249 | 253 | filehash = hashlib.md5() |
| 250 | 254 | |
| 251 | | with f=ChunkyFile(tempfile): |
| | 255 | with ChunkyFile(tempfile) as f: |
| 252 | 256 | for chunk in f.read_chunks(): |
| 253 | 257 | filehash.update(chunk) |
| 254 | | digest = filehash.hexdigest() |
| | 258 | hash = filehash.hexdigest() |
| | 259 | _logger.debug("Got file with hash %s...", hash) |
| | 260 | |
| | 261 | results, numresults = datastore.find({'hash':hash}) |
| | 262 | _logger.debug("%s results in datastore...", str(numresults)) |
| 255 | 263 | |
| 256 | | results, numresults = datastore.find({'hash':digest}) |
| 257 | 264 | if numresults: |
| 258 | 265 | _logger.debug("File %s already in datastore (id %s)...", tempfile, |
| 259 | | results[0]['uid']) |
| 260 | | self._jobject = datastore.get(results[0]['uid']) |
| 261 | | os.rm(tempfile) |
| | 266 | results[0].object_id) |
| | 267 | self._jobject = results[0] |
| | 268 | os.remove(tempfile) |
| 262 | 269 | return self._jobject.file_path |
| 263 | 270 | |
| 264 | 271 | else: |
| 265 | 272 | _logger.debug("Moving file %s to datastore...", tempfile) |
| 266 | 273 | self._jobject.file_path = tempfile |
| 267 | | self._jobject.metadata['hash'] = filehash.hexdigest() |
| | 274 | self._jobject.metadata['hash'] = hash |
| 268 | 275 | datastore.write(self._jobject, transfer_ownership=True) |
| 269 | | return tempfile |
| | 276 | self._jobject = self._revalidate_jobject(self._jobject) |
| | 277 | _logger.debug("File path in datastore is %s", self._jobject.file_path) |
| | 278 | return self._jobject.file_path |
| 270 | 279 | |
| 271 | 280 | def _download_result_cb(self, getter, tempfile, suggested_name, tube_id): |
| 272 | 281 | del self.unused_download_tubes |