summaryrefslogtreecommitdiffstats
path: root/DeDRM_calibre_plugin
diff options
context:
space:
mode:
authorApprentice Alf <[email protected]>2013-10-02 19:59:40 +0100
committerApprentice Alf <[email protected]>2015-03-07 21:10:52 +0000
commitb1feca321df4804b0f178ce96c476a70b9016113 (patch)
treeae61124973b2a10e57ff85aa0c3e610ee007c78b /DeDRM_calibre_plugin
parent74a4c894cb24cb2eeadbe2bf8afcba522e430bb3 (diff)
tools v6.0.8
Diffstat (limited to 'DeDRM_calibre_plugin')
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin.zipbin334161 -> 336887 bytes
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/__init__.py9
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/android.py157
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt6
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py45
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py227
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py659
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py1378
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py928
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py856
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/genbook.py1129
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py340
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py508
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py2097
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py2466
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py541
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py2039
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py1938
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.pybin87160 -> 4738 bytes
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylibbin23859 -> 87160 bytes
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.sobin33417 -> 23859 bytes
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.sobin21895 -> 33417 bytes
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py620
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py89
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt292
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/prefs.py292
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py7
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py148
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py680
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/utilities.py561
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py60
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py1437
-rw-r--r--DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py218
33 files changed, 10956 insertions, 8771 deletions
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin.zip b/DeDRM_calibre_plugin/DeDRM_plugin.zip
index 7c4878a..58d8174 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin.zip
+++ b/DeDRM_calibre_plugin/DeDRM_plugin.zip
Binary files differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
index caed6e8..37d4cb1 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/__init__.py
@@ -31,14 +31,17 @@ __docformat__ = 'restructuredtext en'
# 6.0.3 - Fixes for Kindle for Mac and Windows non-ascii user names
# 6.0.4 - Fixes for stand-alone scripts and applications
# and pdb files in plugin and initial conversion of prefs.
+# 6.0.5 - Fix a key issue
# 6.0.6 - Fix up an incorrect function call
+# 6.0.7 - Error handling for incomplete PDF metadata
+# 6.0.8 - Fixes a Wine key issue and topaz support
"""
Decrypt DRMed ebooks.
"""
PLUGIN_NAME = u"DeDRM"
-PLUGIN_VERSION_TUPLE = (6, 0, 7)
+PLUGIN_VERSION_TUPLE = (6, 0, 8)
PLUGIN_VERSION = u".".join([unicode(str(x)) for x in PLUGIN_VERSION_TUPLE])
# Include an html helpfile in the plugin's zipfile with the following name.
RESOURCE_NAME = PLUGIN_NAME + '_Help.htm'
@@ -313,7 +316,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"adobekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".der",dedrmprefs['adobewineprefix'])
except:
pass
@@ -391,7 +394,7 @@ class DeDRM(FileTypePlugin):
from wineutils import WineGetKeys
scriptpath = os.path.join(self.alfdir,u"kindlekey.py")
- defaultkeys = self.WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
+ defaultkeys = WineGetKeys(scriptpath, u".k4i",dedrmprefs['kindlewineprefix'])
except:
pass
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android.py b/DeDRM_calibre_plugin/DeDRM_plugin/android.py
new file mode 100644
index 0000000..ddb94f5
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/android.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+#fileencoding: utf-8
+
+import os
+import sys
+import zlib
+import tarfile
+from hashlib import md5
+from cStringIO import StringIO
+from binascii import a2b_hex, b2a_hex
+
+STORAGE = 'AmazonSecureStorage.xml'
+
+class AndroidObfuscation(object):
+ '''AndroidObfuscation
+ For the key, it's written in java, and run in android dalvikvm
+ '''
+
+ key = a2b_hex('0176e04c9408b1702d90be333fd53523')
+
+ def encrypt(self, plaintext):
+ cipher = self._get_cipher()
+ padding = len(self.key) - len(plaintext) % len(self.key)
+ plaintext += chr(padding) * padding
+ return b2a_hex(cipher.encrypt(plaintext))
+
+ def decrypt(self, ciphertext):
+ cipher = self._get_cipher()
+ plaintext = cipher.decrypt(a2b_hex(ciphertext))
+ return plaintext[:-ord(plaintext[-1])]
+
+ def _get_cipher(self):
+ try:
+ from Crypto.Cipher import AES
+ return AES.new(self.key)
+ except ImportError:
+ from aescbc import AES, noPadding
+ return AES(self.key, padding=noPadding())
+
+class AndroidObfuscationV2(AndroidObfuscation):
+ '''AndroidObfuscationV2
+ '''
+
+ count = 503
+ password = 'Thomsun was here!'
+
+ def __init__(self, salt):
+ key = self.password + salt
+ for _ in range(self.count):
+ key = md5(key).digest()
+ self.key = key[:8]
+ self.iv = key[8:16]
+
+ def _get_cipher(self):
+ try :
+ from Crypto.Cipher import DES
+ return DES.new(self.key, DES.MODE_CBC, self.iv)
+ except ImportError:
+ from python_des import Des, CBC
+ return Des(self.key, CBC, self.iv)
+
+def parse_preference(path):
+ ''' parse android's shared preference xml '''
+ storage = {}
+ read = open(path)
+ for line in read:
+ line = line.strip()
+ # <string name="key">value</string>
+ if line.startswith('<string name="'):
+ index = line.find('"', 14)
+ key = line[14:index]
+ value = line[index+2:-9]
+ storage[key] = value
+ read.close()
+ return storage
+
+def get_serials(path=None):
+ ''' get serials from android's shared preference xml '''
+ if path is None:
+ if not os.path.isfile(STORAGE):
+ if os.path.isfile("backup.ab"):
+ get_storage()
+ else:
+ return []
+ path = STORAGE
+
+ if not os.path.isfile(path):
+ return []
+
+ storage = parse_preference(path)
+ salt = storage.get('AmazonSaltKey')
+ if salt and len(salt) == 16:
+ sys.stdout.write('Using AndroidObfuscationV2\n')
+ obfuscation = AndroidObfuscationV2(a2b_hex(salt))
+ else:
+ sys.stdout.write('Using AndroidObfuscation\n')
+ obfuscation = AndroidObfuscation()
+
+ def get_value(key):
+ encrypted_key = obfuscation.encrypt(key)
+ encrypted_value = storage.get(encrypted_key)
+ if encrypted_value:
+ return obfuscation.decrypt(encrypted_value)
+ return ''
+
+ # also see getK4Pids in kgenpids.py
+ try:
+ dsnid = get_value('DsnId')
+ except:
+ sys.stderr.write('cannot get DsnId\n')
+ return []
+
+ try:
+ tokens = set(get_value('kindle.account.tokens').split(','))
+ except:
+ return [dsnid]
+
+ serials = []
+ for token in tokens:
+ if token:
+ serials.append('%s%s' % (dsnid, token))
+ serials.append(dsnid)
+ for token in tokens:
+ if token:
+ serials.append(token)
+ return serials
+
+def get_storage(path='backup.ab'):
+ '''get AmazonSecureStorage.xml from android backup.ab
+ backup.ab can be get using adb command:
+ shell> adb backup com.amazon.kindle
+ '''
+ output = None
+ read = open(path, 'rb')
+ head = read.read(24)
+ if head == 'ANDROID BACKUP\n1\n1\nnone\n':
+ output = StringIO(zlib.decompress(read.read()))
+ read.close()
+
+ if not output:
+ return False
+
+ tar = tarfile.open(fileobj=output)
+ for member in tar.getmembers():
+ if member.name.strip().endswith(STORAGE):
+ write = open(STORAGE, 'w')
+ write.write(tar.extractfile(member).read())
+ write.close()
+ break
+
+ return True
+
+__all__ = [ 'get_storage', 'get_serials', 'parse_preference',
+ 'AndroidObfuscation', 'AndroidObfuscationV2', 'STORAGE']
+
+if __name__ == '__main__':
+ print get_serials() \ No newline at end of file
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt
new file mode 100644
index 0000000..9e7d035
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/android_readme.txt
@@ -0,0 +1,6 @@
+1.1 get AmazonSecureStorage.xml from /data/data/com.amazon.kindle/shared_prefs/AmazonSecureStorage.xml
+
+1.2 on android 4.0+, run `adb backup com.amazon.kindle` from PC will get backup.ab
+ now android.py can convert backup.ab to AmazonSecureStorage.xml
+
+2. run `k4mobidedrm.py -a AmazonSecureStorage.xml <infile> <outdir>'
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py b/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py
deleted file mode 100644
index 6bb8c37..0000000
--- a/DeDRM_calibre_plugin/DeDRM_plugin/dialogs.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# base64.py, version 1.0
-# Copyright © 2010 Apprentice Alf
-
-# Released under the terms of the GNU General Public Licence, version 3 or
-# later. <http://www.gnu.org/licenses/>
-
-# Revision history:
-# 1 - Initial release. To allow Applescript to do base64 encoding
-
-"""
-Provide base64 encoding.
-"""
-
-from __future__ import with_statement
-
-__license__ = 'GPL v3'
-
-import sys
-import os
-import base64
-
-def usage(progname):
- print "Applies base64 encoding to the supplied file, sending to standard output"
- print "Usage:"
- print " %s <infile>" % progname
-
-def cli_main(argv=sys.argv):
- progname = os.path.basename(argv[0])
-
- if len(argv)<2:
- usage(progname)
- sys.exit(2)
-
- keypath = argv[1]
- with open(keypath, 'rb') as f:
- keyder = f.read()
- print keyder.encode('base64')
- return 0
-
-
-if __name__ == '__main__':
- sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
index 11f1427..6bb8c37 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/encodebase64.py
@@ -1,208 +1,45 @@
-#!/usr/bin/python
-#
-# This is a python script. You need a Python interpreter to run it.
-# For example, ActiveState Python, which exists for windows.
-#
-# Changelog drmcheck
-# 1.00 - Initial version, with code from various other scripts
-# 1.01 - Moved authorship announcement to usage section.
-#
-# Changelog epubtest
-# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf
-# 1.01 - Added routine for use by Windows DeDRM
-#
-# Written in 2011 by Paul Durrant
-# Released with unlicense. See http://unlicense.org/
-#
-#############################################################################
-#
-# This is free and unencumbered software released into the public domain.
-#
-# Anyone is free to copy, modify, publish, use, compile, sell, or
-# distribute this software, either in source code form or as a compiled
-# binary, for any purpose, commercial or non-commercial, and by any
-# means.
-#
-# In jurisdictions that recognize copyright laws, the author or authors
-# of this software dedicate any and all copyright interest in the
-# software to the public domain. We make this dedication for the benefit
-# of the public at large and to the detriment of our heirs and
-# successors. We intend this dedication to be an overt act of
-# relinquishment in perpetuity of all present and future rights to this
-# software under copyright law.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-#############################################################################
-#
-# It's still polite to give attribution if you do reuse this code.
-#
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
-from __future__ import with_statement
-
-__version__ = '1.01'
-
-import sys, struct, os
-import zlib
-import zipfile
-import xml.etree.ElementTree as etree
-
-NSMAP = {'adept': 'http://ns.adobe.com/adept',
- 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
-
-# Wrap a stream so that output gets flushed immediately
-# and also make sure that any unicode strings get
-# encoded using "replace" before writing them.
-class SafeUnbuffered:
- def __init__(self, stream):
- self.stream = stream
- self.encoding = stream.encoding
- if self.encoding == None:
- self.encoding = "utf-8"
- def write(self, data):
- if isinstance(data,unicode):
- data = data.encode(self.encoding,"replace")
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-try:
- from calibre.constants import iswindows, isosx
-except:
- iswindows = sys.platform.startswith('win')
- isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
- if iswindows:
- # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
- # strings.
-
- # Versions 2.x of Python don't support Unicode in sys.argv on
- # Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
- # as a list of Unicode strings and encode them as utf-8
+# base64.py, version 1.0
+# Copyright © 2010 Apprentice Alf
- from ctypes import POINTER, byref, cdll, c_int, windll
- from ctypes.wintypes import LPCWSTR, LPWSTR
+# Released under the terms of the GNU General Public Licence, version 3 or
+# later. <http://www.gnu.org/licenses/>
- GetCommandLineW = cdll.kernel32.GetCommandLineW
- GetCommandLineW.argtypes = []
- GetCommandLineW.restype = LPCWSTR
+# Revision history:
+# 1 - Initial release. To allow Applescript to do base64 encoding
- CommandLineToArgvW = windll.shell32.CommandLineToArgvW
- CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
- CommandLineToArgvW.restype = POINTER(LPWSTR)
+"""
+Provide base64 encoding.
+"""
- cmd = GetCommandLineW()
- argc = c_int(0)
- argv = CommandLineToArgvW(cmd, byref(argc))
- if argc.value > 0:
- # Remove Python executable and commands if present
- start = argc.value - len(sys.argv)
- return [argv[i] for i in
- xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"epubtest.py"]
- else:
- argvencoding = sys.stdin.encoding
- if argvencoding == None:
- argvencoding = "utf-8"
- return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-
-_FILENAME_LEN_OFFSET = 26
-_EXTRA_LEN_OFFSET = 28
-_FILENAME_OFFSET = 30
-_MAX_SIZE = 64 * 1024
-
-
-def uncompress(cmpdata):
- dc = zlib.decompressobj(-15)
- data = ''
- while len(cmpdata) > 0:
- if len(cmpdata) > _MAX_SIZE :
- newdata = cmpdata[0:_MAX_SIZE]
- cmpdata = cmpdata[_MAX_SIZE:]
- else:
- newdata = cmpdata
- cmpdata = ''
- newdata = dc.decompress(newdata)
- unprocessed = dc.unconsumed_tail
- if len(unprocessed) == 0:
- newdata += dc.flush()
- data += newdata
- cmpdata += unprocessed
- unprocessed = ''
- return data
-
-def getfiledata(file, zi):
- # get file name length and exta data length to find start of file data
- local_header_offset = zi.header_offset
-
- file.seek(local_header_offset + _FILENAME_LEN_OFFSET)
- leninfo = file.read(2)
- local_name_length, = struct.unpack('<H', leninfo)
-
- file.seek(local_header_offset + _EXTRA_LEN_OFFSET)
- exinfo = file.read(2)
- extra_field_length, = struct.unpack('<H', exinfo)
+from __future__ import with_statement
- file.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
- data = None
+__license__ = 'GPL v3'
- # if not compressed we are good to go
- if zi.compress_type == zipfile.ZIP_STORED:
- data = file.read(zi.file_size)
+import sys
+import os
+import base64
- # if compressed we must decompress it using zlib
- if zi.compress_type == zipfile.ZIP_DEFLATED:
- cmpdata = file.read(zi.compress_size)
- data = uncompress(cmpdata)
+def usage(progname):
+ print "Applies base64 encoding to the supplied file, sending to standard output"
+ print "Usage:"
+ print " %s <infile>" % progname
- return data
+def cli_main(argv=sys.argv):
+ progname = os.path.basename(argv[0])
-def encryption(infile):
- # returns encryption: one of Unencrypted, Adobe, B&N and Unknown
- encryption = "Unknown"
- try:
- with open(infile,'rb') as infileobject:
- bookdata = infileobject.read(58)
- # Check for Zip
- if bookdata[0:0+2] == "PK":
- foundrights = False
- foundencryption = False
- inzip = zipfile.ZipFile(infile,'r')
- namelist = set(inzip.namelist())
- if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist:
- encryption = "Unencrypted"
- else:
- rights = etree.fromstring(inzip.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) == 172:
- encryption = "Adobe"
- elif len(bookkey) == 64:
- encryption = "B&N"
- else:
- encryption = "Unknown"
- except:
- traceback.print_exc()
- return encryption
+ if len(argv)<2:
+ usage(progname)
+ sys.exit(2)
-def main():
- argv=unicode_argv()
- print encryption(argv[1])
+ keypath = argv[1]
+ with open(keypath, 'rb') as f:
+ keyder = f.read()
+ print keyder.encode('base64')
return 0
-if __name__ == "__main__":
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(main())
+
+if __name__ == '__main__':
+ sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
index 1dfef42..11f1427 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/epubtest.py
@@ -1,82 +1,60 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# erdr2pml.py
-# Copyright © 2008 The Dark Reverser
+#!/usr/bin/python
#
-# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf
-
# This is a python script. You need a Python interpreter to run it.
# For example, ActiveState Python, which exists for windows.
-# Changelog
#
-# Based on ereader2html version 0.08 plus some later small fixes
+# Changelog drmcheck
+# 1.00 - Initial version, with code from various other scripts
+# 1.01 - Moved authorship announcement to usage section.
+#
+# Changelog epubtest
+# 1.00 - Cut to epubtest.py, testing ePub files only by Apprentice Alf
+# 1.01 - Added routine for use by Windows DeDRM
+#
+# Written in 2011 by Paul Durrant
+# Released with unlicense. See http://unlicense.org/
+#
+#############################################################################
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#############################################################################
+#
+# It's still polite to give attribution if you do reuse this code.
#
-# 0.01 - Initial version
-# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
-# 0.03 - Fix incorrect variable usage at one place.
-# 0.03b - enhancement by DeBockle (version 259 support)
-# Custom version 0.03 - no change to eReader support, only usability changes
-# - start of pep-8 indentation (spaces not tab), fix trailing blanks
-# - version variable, only one place to change
-# - added main routine, now callable as a library/module,
-# means tools can add optional support for ereader2html
-# - outdir is no longer a mandatory parameter (defaults based on input name if missing)
-# - time taken output to stdout
-# - Psyco support - reduces runtime by a factor of (over) 3!
-# E.g. (~600Kb file) 90 secs down to 24 secs
-# - newstyle classes
-# - changed map call to list comprehension
-# may not work with python 2.3
-# without Psyco this reduces runtime to 90%
-# E.g. 90 secs down to 77 secs
-# Psyco with map calls takes longer, do not run with map in Psyco JIT!
-# - izip calls used instead of zip (if available), further reduction
-# in run time (factor of 4.5).
-# E.g. (~600Kb file) 90 secs down to 20 secs
-# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
-# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags
-# - Feature change, dump out PML file
-# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
-# in some pdb files :-( due to the same id being used multiple times
-# - Added correct charset encoding (pml is based on cp1252)
-# - Added logging support.
-# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
-# 0.06 - Merge of 0.04 and 0.05. Improved HTML output
-# Placed images in subfolder, so that it's possible to just
-# drop the book.pml file onto DropBook to make an unencrypted
-# copy of the eReader file.
-# Using that with Calibre works a lot better than the HTML
-# conversion in this code.
-# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes
-# 0.08 - fixed typos, removed extraneous things
-# 0.09 - fixed typos in first_pages to first_page to again support older formats
-# 0.10 - minor cleanups
-# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
-# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
-# 0.13 - change to unbuffered stdout for use with gui front ends
-# 0.14 - contributed enhancement to support --make-pmlz switch
-# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
-# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
-# 0.17 - added support for pycrypto's DES as well
-# 0.18 - on Windows try PyCrypto first and OpenSSL next
-# 0.19 - Modify the interface to allow use of import
-# 0.20 - modify to allow use inside new interface for calibre plugins
-# 0.21 - Support eReader (drm) version 11.
-# - Don't reject dictionary format.
-# - Ignore sidebars for dictionaries (different format?)
-# 0.22 - Unicode and plugin support, different image folders for PMLZ and source
-# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility
-__version__='0.23'
+from __future__ import with_statement
-import sys, re
-import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback
+__version__ = '1.01'
-if 'calibre' in sys.modules:
- inCalibre = True
-else:
- inCalibre = False
+import sys, struct, os
+import zlib
+import zipfile
+import xml.etree.ElementTree as etree
+
+NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -95,8 +73,11 @@ class SafeUnbuffered:
def __getattr__(self, attr):
return getattr(self.stream, attr)
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
+try:
+ from calibre.constants import iswindows, isosx
+except:
+ iswindows = sys.platform.startswith('win')
+ isosx = sys.platform.startswith('darwin')
def unicode_argv():
if iswindows:
@@ -105,8 +86,8 @@ def unicode_argv():
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
+ # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
+ # as a list of Unicode strings and encode them as utf-8
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
@@ -129,469 +110,99 @@ def unicode_argv():
xrange(start, argc.value)]
# if we don't have any arguments at all, just pass back script name
# this should never happen
- return [u"mobidedrm.py"]
+ return [u"epubtest.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
argvencoding = "utf-8"
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-Des = None
-if iswindows:
- # first try with pycrypto
- if inCalibre:
- from calibre_plugins.dedrm import pycrypto_des
- else:
- import pycrypto_des
- Des = pycrypto_des.load_pycrypto()
- if Des == None:
- # they try with openssl
- if inCalibre:
- from calibre_plugins.dedrm import openssl_des
- else:
- import openssl_des
- Des = openssl_des.load_libcrypto()
-else:
- # first try with openssl
- if inCalibre:
- from calibre_plugins.dedrm import openssl_des
- else:
- import openssl_des
- Des = openssl_des.load_libcrypto()
- if Des == None:
- # then try with pycrypto
- if inCalibre:
- from calibre_plugins.dedrm import pycrypto_des
- else:
- import pycrypto_des
- Des = pycrypto_des.load_pycrypto()
-
-# if that did not work then use pure python implementation
-# of DES and try to speed it up with Psycho
-if Des == None:
- if inCalibre:
- from calibre_plugins.dedrm import python_des
- else:
- import python_des
- Des = python_des.Des
- # Import Psyco if available
- try:
- # http://psyco.sourceforge.net
- import psyco
- psyco.full()
- except ImportError:
- pass
-
-try:
- from hashlib import sha1
-except ImportError:
- # older Python release
- import sha
- sha1 = lambda s: sha.new(s)
-
-import cgi
-import logging
-
-logging.basicConfig()
-#logging.basicConfig(level=logging.DEBUG)
-
-
-class Sectionizer(object):
- bkType = "Book"
-
- def __init__(self, filename, ident):
- self.contents = file(filename, 'rb').read()
- self.header = self.contents[0:72]
- self.num_sections, = struct.unpack('>H', self.contents[76:78])
- # Dictionary or normal content (TODO: Not hard-coded)
- if self.header[0x3C:0x3C+8] != ident:
- if self.header[0x3C:0x3C+8] == "PDctPPrs":
- self.bkType = "Dict"
- else:
- raise ValueError('Invalid file format')
- self.sections = []
- for i in xrange(self.num_sections):
- offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
- flags, val = a1, a2<<16|a3<<8|a4
- self.sections.append( (offset, flags, val) )
- def loadSection(self, section):
- if section + 1 == self.num_sections:
- end_off = len(self.contents)
- else:
- end_off = self.sections[section + 1][0]
- off = self.sections[section][0]
- return self.contents[off:end_off]
-
-# cleanup unicode filenames
-# borrowed from calibre from calibre/src/calibre/__init__.py
-# added in removal of control (<32) chars
-# and removal of . at start and end
-# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
-def sanitizeFileName(name):
- # substitute filename unfriendly characters
- name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'")
- # delete control characters
- name = u"".join(char for char in name if ord(char)>=32)
- # white space to single space, delete leading and trailing while space
- name = re.sub(ur"\s", u" ", name).strip()
- # remove leading dots
- while len(name)>0 and name[0] == u".":
- name = name[1:]
- # remove trailing dots (Windows doesn't like them)
- if name.endswith(u'.'):
- name = name[:-1]
- return name
-
-def fixKey(key):
- def fixByte(b):
- return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
- return "".join([chr(fixByte(ord(a))) for a in key])
-
-def deXOR(text, sp, table):
- r=''
- j = sp
- for i in xrange(len(text)):
- r += chr(ord(table[j]) ^ ord(text[i]))
- j = j + 1
- if j == len(table):
- j = 0
- return r
-
-class EreaderProcessor(object):
- def __init__(self, sect, user_key):
- self.section_reader = sect.loadSection
- data = self.section_reader(0)
- version, = struct.unpack('>H', data[0:2])
- self.version = version
- logging.info('eReader file format version %s', version)
- if version != 272 and version != 260 and version != 259:
- raise ValueError('incorrect eReader version %d (error 1)' % version)
- data = self.section_reader(1)
- self.data = data
- des = Des(fixKey(data[0:8]))
- cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
- if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
- raise ValueError('incorrect eReader version (error 2)')
- input = des.decrypt(data[-cookie_size:])
- def unshuff(data, shuf):
- r = [''] * len(data)
- j = 0
- for i in xrange(len(data)):
- j = (j + shuf) % len(data)
- r[j] = data[i]
- assert len("".join(r)) == len(data)
- return "".join(r)
- r = unshuff(input[0:-8], cookie_shuf)
+_FILENAME_LEN_OFFSET = 26
+_EXTRA_LEN_OFFSET = 28
+_FILENAME_OFFSET = 30
+_MAX_SIZE = 64 * 1024
- drm_sub_version = struct.unpack('>H', r[0:2])[0]
- self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
- self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
- self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
- # Default values
- self.num_footnote_pages = 0
- self.num_sidebar_pages = 0
- self.first_footnote_page = -1
- self.first_sidebar_page = -1
- if self.version == 272:
- self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
- self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
- if (sect.bkType == "Book"):
- self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
- self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
- # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
- # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
- # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
- # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
- # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
- # self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
- # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
- # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
- # **before** data record 1 was decrypted and unshuffled, it contained data
- # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
- self.xortable_offset = struct.unpack('>H', r[40:40+2])[0]
- self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
- self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
+def uncompress(cmpdata):
+ dc = zlib.decompressobj(-15)
+ data = ''
+ while len(cmpdata) > 0:
+ if len(cmpdata) > _MAX_SIZE :
+ newdata = cmpdata[0:_MAX_SIZE]
+ cmpdata = cmpdata[_MAX_SIZE:]
else:
- # Nothing needs to be done
- pass
- # self.num_bookinfo_pages = 0
- # self.num_chapter_pages = 0
- # self.num_link_pages = 0
- # self.num_xtextsize_pages = 0
- # self.first_bookinfo_page = -1
- # self.first_chapter_page = -1
- # self.first_link_page = -1
- # self.first_xtextsize_page = -1
-
- logging.debug('self.num_text_pages %d', self.num_text_pages)
- logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
- logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
- self.flags = struct.unpack('>L', r[4:8])[0]
- reqd_flags = (1<<9) | (1<<7) | (1<<10)
- if (self.flags & reqd_flags) != reqd_flags:
- print "Flags: 0x%X" % self.flags
- raise ValueError('incompatible eReader file')
- des = Des(fixKey(user_key))
- if version == 259:
- if drm_sub_version != 7:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- encrypted_key_sha = r[44:44+20]
- encrypted_key = r[64:64+8]
- elif version == 260:
- if drm_sub_version != 13 and drm_sub_version != 11:
- raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
- if drm_sub_version == 13:
- encrypted_key = r[44:44+8]
- encrypted_key_sha = r[52:52+20]
- else:
- encrypted_key = r[64:64+8]
- encrypted_key_sha = r[44:44+20]
- elif version == 272:
- encrypted_key = r[172:172+8]
- encrypted_key_sha = r[56:56+20]
- self.content_key = des.decrypt(encrypted_key)
- if sha1(self.content_key).digest() != encrypted_key_sha:
- raise ValueError('Incorrect Name and/or Credit Card')
-
- def getNumImages(self):
- return self.num_image_pages
-
- def getImage(self, i):
- sect = self.section_reader(self.first_image_page + i)
- name = sect[4:4+32].strip('\0')
- data = sect[62:]
- return sanitizeFileName(unicode(name,'windows-1252')), data
-
-
- # def getChapterNamePMLOffsetData(self):
- # cv = ''
- # if self.num_chapter_pages > 0:
- # for i in xrange(self.num_chapter_pages):
- # chaps = self.section_reader(self.first_chapter_page + i)
- # j = i % self.xortable_size
- # offname = deXOR(chaps, j, self.xortable)
- # offset = struct.unpack('>L', offname[0:4])[0]
- # name = offname[4:].strip('\0')
- # cv += '%d|%s\n' % (offset, name)
- # return cv
-
- # def getLinkNamePMLOffsetData(self):
- # lv = ''
- # if self.num_link_pages > 0:
- # for i in xrange(self.num_link_pages):
- # links = self.section_reader(self.first_link_page + i)
- # j = i % self.xortable_size
- # offname = deXOR(links, j, self.xortable)
- # offset = struct.unpack('>L', offname[0:4])[0]
- # name = offname[4:].strip('\0')
- # lv += '%d|%s\n' % (offset, name)
- # return lv
-
- # def getExpandedTextSizesData(self):
- # ts = ''
- # if self.num_xtextsize_pages > 0:
- # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
- # for i in xrange(self.num_text_pages):
- # xsize = struct.unpack('>H', tsize[0:2])[0]
- # ts += "%d\n" % xsize
- # tsize = tsize[2:]
- # return ts
-
- # def getBookInfo(self):
- # bkinfo = ''
- # if self.num_bookinfo_pages > 0:
- # info = self.section_reader(self.first_bookinfo_page)
- # bkinfo = deXOR(info, 0, self.xortable)
- # bkinfo = bkinfo.replace('\0','|')
- # bkinfo += '\n'
- # return bkinfo
-
- def getText(self):
- des = Des(fixKey(self.content_key))
- r = ''
- for i in xrange(self.num_text_pages):
- logging.debug('get page %d', i)
- r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
-
- # now handle footnotes pages
- if self.num_footnote_pages > 0:
- r += '\n'
- # the record 0 of the footnote section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_footnote_page)
- fnote_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- for i in xrange(1,self.num_footnote_pages):
- logging.debug('get footnotepage %d', i)
- id_len = ord(fnote_ids[2])
- id = fnote_ids[3:3+id_len]
- fmarker = '<footnote id="%s">\n' % id
- fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
- fmarker += '\n</footnote>\n'
- r += fmarker
- fnote_ids = fnote_ids[id_len+4:]
-
- # TODO: Handle dictionary index (?) pages - which are also marked as
- # sidebar_pages (?). For now dictionary sidebars are ignored
- # For dictionaries - record 0 is null terminated strings, followed by
- # blocks of around 62000 bytes and a final block. Not sure of the
- # encoding
-
- # now handle sidebar pages
- if self.num_sidebar_pages > 0:
- r += '\n'
- # the record 0 of the sidebar section must pass through the Xor Table to make it useful
- sect = self.section_reader(self.first_sidebar_page)
- sbar_ids = deXOR(sect, 0, self.xortable)
- # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
- des = Des(fixKey(self.content_key))
- for i in xrange(1,self.num_sidebar_pages):
- id_len = ord(sbar_ids[2])
- id = sbar_ids[3:3+id_len]
- smarker = '<sidebar id="%s">\n' % id
- smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
- smarker += '\n</sidebar>\n'
- r += smarker
- sbar_ids = sbar_ids[id_len+4:]
-
- return r
-
-def cleanPML(pml):
- # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
- pml2 = pml
- for k in xrange(128,256):
- badChar = chr(k)
- pml2 = pml2.replace(badChar, '\\a%03d' % k)
- return pml2
-
-def decryptBook(infile, outpath, make_pmlz, user_key):
- bookname = os.path.splitext(os.path.basename(infile))[0]
- if make_pmlz:
- # outpath is actually pmlz name
- pmlzname = outpath
- outdir = tempfile.mkdtemp()
- imagedirpath = os.path.join(outdir,u"images")
- else:
- pmlzname = None
- outdir = outpath
- imagedirpath = os.path.join(outdir,bookname + u"_img")
-
+ newdata = cmpdata
+ cmpdata = ''
+ newdata = dc.decompress(newdata)
+ unprocessed = dc.unconsumed_tail
+ if len(unprocessed) == 0:
+ newdata += dc.flush()
+ data += newdata
+ cmpdata += unprocessed
+ unprocessed = ''
+ return data
+
+def getfiledata(file, zi):
+ # get file name length and exta data length to find start of file data
+ local_header_offset = zi.header_offset
+
+ file.seek(local_header_offset + _FILENAME_LEN_OFFSET)
+ leninfo = file.read(2)
+ local_name_length, = struct.unpack('<H', leninfo)
+
+ file.seek(local_header_offset + _EXTRA_LEN_OFFSET)
+ exinfo = file.read(2)
+ extra_field_length, = struct.unpack('<H', exinfo)
+
+ file.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
+ data = None
+
+ # if not compressed we are good to go
+ if zi.compress_type == zipfile.ZIP_STORED:
+ data = file.read(zi.file_size)
+
+ # if compressed we must decompress it using zlib
+ if zi.compress_type == zipfile.ZIP_DEFLATED:
+ cmpdata = file.read(zi.compress_size)
+ data = uncompress(cmpdata)
+
+ return data
+
+def encryption(infile):
+ # returns encryption: one of Unencrypted, Adobe, B&N and Unknown
+ encryption = "Unknown"
try:
- if not os.path.exists(outdir):
- os.makedirs(outdir)
- print u"Decoding File"
- sect = Sectionizer(infile, 'PNRdPPrs')
- er = EreaderProcessor(sect, user_key)
-
- if er.getNumImages() > 0:
- print u"Extracting images"
- if not os.path.exists(imagedirpath):
- os.makedirs(imagedirpath)
- for i in xrange(er.getNumImages()):
- name, contents = er.getImage(i)
- file(os.path.join(imagedirpath, name), 'wb').write(contents)
-
- print u"Extracting pml"
- pml_string = er.getText()
- pmlfilename = bookname + ".pml"
- file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
- if pmlzname is not None:
- import zipfile
- import shutil
- print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname))
- myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False)
- list = os.listdir(outdir)
- for filename in list:
- localname = filename
- filePath = os.path.join(outdir,filename)
- if os.path.isfile(filePath):
- myZipFile.write(filePath, localname)
- elif os.path.isdir(filePath):
- imageList = os.listdir(filePath)
- localimgdir = os.path.basename(filePath)
- for image in imageList:
- localname = os.path.join(localimgdir,image)
- imagePath = os.path.join(filePath,image)
- if os.path.isfile(imagePath):
- myZipFile.write(imagePath, localname)
- myZipFile.close()
- # remove temporary directory
- shutil.rmtree(outdir, True)
- print u"Output is {0}".format(pmlzname)
- else :
- print u"Output is in {0}".format(outdir)
- print "done"
- except ValueError, e:
- print u"Error: {0}".format(e)
+ with open(infile,'rb') as infileobject:
+ bookdata = infileobject.read(58)
+ # Check for Zip
+ if bookdata[0:0+2] == "PK":
+ foundrights = False
+ foundencryption = False
+ inzip = zipfile.ZipFile(infile,'r')
+ namelist = set(inzip.namelist())
+ if 'META-INF/rights.xml' not in namelist or 'META-INF/encryption.xml' not in namelist:
+ encryption = "Unencrypted"
+ else:
+ rights = etree.fromstring(inzip.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) == 172:
+ encryption = "Adobe"
+ elif len(bookkey) == 64:
+ encryption = "B&N"
+ else:
+ encryption = "Unknown"
+ except:
traceback.print_exc()
- return 1
- return 0
-
-
-def usage():
- print u"Converts DRMed eReader books to PML Source"
- print u"Usage:"
- print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number"
- print u" "
- print u"Options: "
- print u" -h prints this message"
- print u" -p create PMLZ instead of source folder"
- print u" --make-pmlz create PMLZ instead of source folder"
- print u" "
- print u"Note:"
- print u" if outpath is ommitted, creates source in 'infile_Source' folder"
- print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'"
- print u" if source folder created, images are in infile_img folder"
- print u" if pmlz file created, images are in images folder"
- print u" It's enough to enter the last 8 digits of the credit card number"
- return
-
-def getuser_key(name,cc):
- newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9')
- cc = cc.replace(" ","")
- return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff)
-
-def cli_main():
- print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__)
+ return encryption
+def main():
argv=unicode_argv()
- try:
- opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"])
- except getopt.GetoptError, err:
- print err.args[0]
- usage()
- return 1
- make_pmlz = False
- for o, a in opts:
- if o == "-h":
- usage()
- return 0
- elif o == "-p":
- make_pmlz = True
- elif o == "--make-pmlz":
- make_pmlz = True
-
- if len(args)!=3 and len(args)!=4:
- usage()
- return 1
-
- if len(args)==3:
- infile, name, cc = args
- if make_pmlz:
- outpath = os.path.splitext(infile)[0] + u".pmlz"
- else:
- outpath = os.path.splitext(infile)[0] + u"_Source"
- elif len(args)==4:
- infile, outpath, name, cc = args
-
- print getuser_key(name,cc).encode('hex')
-
- return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc))
-
+ print encryption(argv[1])
+ return 0
if __name__ == "__main__":
sys.stdout=SafeUnbuffered(sys.stdout)
sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(cli_main())
-
+ sys.exit(main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
index 4d83368..1dfef42 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/erdr2pml.py
@@ -1,797 +1,597 @@
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# For use with Topaz Scripts Version 2.6
-
-import sys
-import csv
-import os
-import math
-import getopt
-from struct import pack
-from struct import unpack
-
-
-class DocParser(object):
- def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- self.id = os.path.basename(fileid).replace('.dat','')
- self.svgcount = 0
- self.docList = flatxml.split('\n')
- self.docSize = len(self.docList)
- self.classList = {}
- self.bookDir = bookDir
- self.gdict = gdict
- tmpList = classlst.split('\n')
- for pclass in tmpList:
- if pclass != '':
- # remove the leading period from the css name
- cname = pclass[1:]
- self.classList[cname] = True
- self.fixedimage = fixedimage
- self.ocrtext = []
- self.link_id = []
- self.link_title = []
- self.link_page = []
- self.link_href = []
- self.link_type = []
- self.dehyphen_rootid = []
- self.paracont_stemid = []
- self.parastems_stemid = []
-
-
- def getGlyph(self, gid):
- result = ''
- id='id="gl%d"' % gid
- return self.gdict.lookup(id)
-
- def glyphs_to_image(self, glyphList):
-
- def extract(path, key):
- b = path.find(key) + len(key)
- e = path.find(' ',b)
- return int(path[b:e])
-
- svgDir = os.path.join(self.bookDir,'svg')
-
- imgDir = os.path.join(self.bookDir,'img')
- imgname = self.id + '_%04d.svg' % self.svgcount
- imgfile = os.path.join(imgDir,imgname)
-
- # get glyph information
- gxList = self.getData('info.glyph.x',0,-1)
- gyList = self.getData('info.glyph.y',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- gids = []
- maxws = []
- maxhs = []
- xs = []
- ys = []
- gdefs = []
-
- # get path defintions, positions, dimensions for each glyph
- # that makes up the image, and find min x and min y to reposition origin
- minx = -1
- miny = -1
- for j in glyphList:
- gid = gidList[j]
- gids.append(gid)
-
- xs.append(gxList[j])
- if minx == -1: minx = gxList[j]
- else : minx = min(minx, gxList[j])
-
- ys.append(gyList[j])
- if miny == -1: miny = gyList[j]
- else : miny = min(miny, gyList[j])
-
- path = self.getGlyph(gid)
- gdefs.append(path)
-
- maxws.append(extract(path,'width='))
- maxhs.append(extract(path,'height='))
-
-
- # change the origin to minx, miny and calc max height and width
- maxw = maxws[0] + xs[0] - minx
- maxh = maxhs[0] + ys[0] - miny
- for j in xrange(0, len(xs)):
- xs[j] = xs[j] - minx
- ys[j] = ys[j] - miny
- maxw = max( maxw, (maxws[j] + xs[j]) )
- maxh = max( maxh, (maxhs[j] + ys[j]) )
-
- # open the image file for output
- ifile = open(imgfile,'w')
- ifile.write('<?xml version="1.0" standalone="no"?>\n')
- ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
- ifile.write('<defs>\n')
- for j in xrange(0,len(gdefs)):
- ifile.write(gdefs[j])
- ifile.write('</defs>\n')
- for j in xrange(0,len(gids)):
- ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
- ifile.write('</svg>')
- ifile.close()
-
- return 0
-
-
-
- # return tag at line pos in document
- def lineinDoc(self, pos) :
- if (pos >= 0) and (pos < self.docSize) :
- item = self.docList[pos]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- return name, argres
-
-
- # find tag in doc if within pos to end inclusive
- def findinDoc(self, tagpath, pos, end) :
- result = None
- if end == -1 :
- end = self.docSize
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# erdr2pml.py
+# Copyright © 2008 The Dark Reverser
+#
+# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf
+
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+# Changelog
+#
+# Based on ereader2html version 0.08 plus some later small fixes
+#
+# 0.01 - Initial version
+# 0.02 - Support more eReader files. Support bold text and links. Fix PML decoder parsing bug.
+# 0.03 - Fix incorrect variable usage at one place.
+# 0.03b - enhancement by DeBockle (version 259 support)
+# Custom version 0.03 - no change to eReader support, only usability changes
+# - start of pep-8 indentation (spaces not tab), fix trailing blanks
+# - version variable, only one place to change
+# - added main routine, now callable as a library/module,
+# means tools can add optional support for ereader2html
+# - outdir is no longer a mandatory parameter (defaults based on input name if missing)
+# - time taken output to stdout
+# - Psyco support - reduces runtime by a factor of (over) 3!
+# E.g. (~600Kb file) 90 secs down to 24 secs
+# - newstyle classes
+# - changed map call to list comprehension
+# may not work with python 2.3
+# without Psyco this reduces runtime to 90%
+# E.g. 90 secs down to 77 secs
+# Psyco with map calls takes longer, do not run with map in Psyco JIT!
+# - izip calls used instead of zip (if available), further reduction
+# in run time (factor of 4.5).
+# E.g. (~600Kb file) 90 secs down to 20 secs
+# - Python 2.6+ support, avoid DeprecationWarning with sha/sha1
+# 0.04 - Footnote support, PML output, correct charset in html, support more PML tags
+# - Feature change, dump out PML file
+# - Added supprt for footnote tags. NOTE footnote ids appear to be bad (not usable)
+# in some pdb files :-( due to the same id being used multiple times
+# - Added correct charset encoding (pml is based on cp1252)
+# - Added logging support.
+# 0.05 - Improved type 272 support for sidebars, links, chapters, metainfo, etc
+# 0.06 - Merge of 0.04 and 0.05. Improved HTML output
+# Placed images in subfolder, so that it's possible to just
+# drop the book.pml file onto DropBook to make an unencrypted
+# copy of the eReader file.
+# Using that with Calibre works a lot better than the HTML
+# conversion in this code.
+# 0.07 - Further Improved type 272 support for sidebars with all earlier fixes
+# 0.08 - fixed typos, removed extraneous things
+# 0.09 - fixed typos in first_pages to first_page to again support older formats
+# 0.10 - minor cleanups
+# 0.11 - fixups for using correct xml for footnotes and sidebars for use with Dropbook
+# 0.12 - Fix added to prevent lowercasing of image names when the pml code itself uses a different case in the link name.
+# 0.13 - change to unbuffered stdout for use with gui front ends
+# 0.14 - contributed enhancement to support --make-pmlz switch
+# 0.15 - enabled high-ascii to pml character encoding. DropBook now works on Mac.
+# 0.16 - convert to use openssl DES (very very fast) or pure python DES if openssl's libcrypto is not available
+# 0.17 - added support for pycrypto's DES as well
+# 0.18 - on Windows try PyCrypto first and OpenSSL next
+# 0.19 - Modify the interface to allow use of import
+# 0.20 - modify to allow use inside new interface for calibre plugins
+# 0.21 - Support eReader (drm) version 11.
+# - Don't reject dictionary format.
+# - Ignore sidebars for dictionaries (different format?)
+# 0.22 - Unicode and plugin support, different image folders for PMLZ and source
+# 0.23 - moved unicode_argv call inside main for Windows DeDRM compatibility
+
+__version__='0.23'
+
+import sys, re
+import struct, binascii, getopt, zlib, os, os.path, urllib, tempfile, traceback
+
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+# Wrap a stream so that output gets flushed immediately
+# and also make sure that any unicode strings get
+# encoded using "replace" before writing them.
+class SafeUnbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ self.encoding = stream.encoding
+ if self.encoding == None:
+ self.encoding = "utf-8"
+ def write(self, data):
+ if isinstance(data,unicode):
+ data = data.encode(self.encoding,"replace")
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+iswindows = sys.platform.startswith('win')
+isosx = sys.platform.startswith('darwin')
+
+def unicode_argv():
+ if iswindows:
+ # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
+ # strings.
+
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'.
+
+
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ xrange(start, argc.value)]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"mobidedrm.py"]
+ else:
+ argvencoding = sys.stdin.encoding
+ if argvencoding == None:
+ argvencoding = "utf-8"
+ return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+
+Des = None
+if iswindows:
+ # first try with pycrypto
+ if inCalibre:
+ from calibre_plugins.dedrm import pycrypto_des
+ else:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+ if Des == None:
+ # they try with openssl
+ if inCalibre:
+ from calibre_plugins.dedrm import openssl_des
else:
- end = min(self.docSize, end)
- foundat = -1
- for j in xrange(pos, end):
- item = self.docList[j]
- if item.find('=') >= 0:
- (name, argres) = item.split('=',1)
- else :
- name = item
- argres = ''
- if name.endswith(tagpath) :
- result = argres
- foundat = j
- break
- return foundat, result
-
-
- # return list of start positions for the tagpath
- def posinDoc(self, tagpath):
- startpos = []
- pos = 0
- res = ""
- while res != None :
- (foundpos, res) = self.findinDoc(tagpath, pos, -1)
- if res != None :
- startpos.append(foundpos)
- pos = foundpos + 1
- return startpos
-
-
- # returns a vector of integers for the tagpath
- def getData(self, tagpath, pos, end):
- argres=[]
- (foundat, argt) = self.findinDoc(tagpath, pos, end)
- if (argt != None) and (len(argt) > 0) :
- argList = argt.split('|')
- argres = [ int(strval) for strval in argList]
- return argres
-
-
- # get the class
- def getClass(self, pclass):
- nclass = pclass
-
- # class names are an issue given topaz may start them with numerals (not allowed),
- # use a mix of cases (which cause some browsers problems), and actually
- # attach numbers after "_reclustered*" to the end to deal classeses that inherit
- # from a base class (but then not actually provide all of these _reclustereed
- # classes in the stylesheet!
-
- # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
- # that exists in the stylesheet first, and then adding this specific class
- # after
-
- # also some class names have spaces in them so need to convert to dashes
- if nclass != None :
- nclass = nclass.replace(' ','-')
- classres = ''
- nclass = nclass.lower()
- nclass = 'cl-' + nclass
- baseclass = ''
- # graphic is the base class for captions
- if nclass.find('cl-cap-') >=0 :
- classres = 'graphic' + ' '
- else :
- # strip to find baseclass
- p = nclass.find('_')
- if p > 0 :
- baseclass = nclass[0:p]
- if baseclass in self.classList:
- classres += baseclass + ' '
- classres += nclass
- nclass = classres
- return nclass
-
-
- # develop a sorted description of the starting positions of
- # groups and regions on the page, as well as the page type
- def PageDescription(self):
-
- def compare(x, y):
- (xtype, xval) = x
- (ytype, yval) = y
- if xval > yval:
- return 1
- if xval == yval:
- return 0
- return -1
-
- result = []
- (pos, pagetype) = self.findinDoc('page.type',0,-1)
-
- groupList = self.posinDoc('page.group')
- groupregionList = self.posinDoc('page.group.region')
- pageregionList = self.posinDoc('page.region')
- # integrate into one list
- for j in groupList:
- result.append(('grpbeg',j))
- for j in groupregionList:
- result.append(('gregion',j))
- for j in pageregionList:
- result.append(('pregion',j))
- result.sort(compare)
-
- # insert group end and page end indicators
- inGroup = False
- j = 0
- while True:
- if j == len(result): break
- rtype = result[j][0]
- rval = result[j][1]
- if not inGroup and (rtype == 'grpbeg') :
- inGroup = True
- j = j + 1
- elif inGroup and (rtype in ('grpbeg', 'pregion')):
- result.insert(j,('grpend',rval))
- inGroup = False
+ import openssl_des
+ Des = openssl_des.load_libcrypto()
+else:
+ # first try with openssl
+ if inCalibre:
+ from calibre_plugins.dedrm import openssl_des
+ else:
+ import openssl_des
+ Des = openssl_des.load_libcrypto()
+ if Des == None:
+ # then try with pycrypto
+ if inCalibre:
+ from calibre_plugins.dedrm import pycrypto_des
+ else:
+ import pycrypto_des
+ Des = pycrypto_des.load_pycrypto()
+
+# if that did not work then use pure python implementation
+# of DES and try to speed it up with Psycho
+if Des == None:
+ if inCalibre:
+ from calibre_plugins.dedrm import python_des
+ else:
+ import python_des
+ Des = python_des.Des
+ # Import Psyco if available
+ try:
+ # http://psyco.sourceforge.net
+ import psyco
+ psyco.full()
+ except ImportError:
+ pass
+
+try:
+ from hashlib import sha1
+except ImportError:
+ # older Python release
+ import sha
+ sha1 = lambda s: sha.new(s)
+
+import cgi
+import logging
+
+logging.basicConfig()
+#logging.basicConfig(level=logging.DEBUG)
+
+
+class Sectionizer(object):
+ bkType = "Book"
+
+ def __init__(self, filename, ident):
+ self.contents = file(filename, 'rb').read()
+ self.header = self.contents[0:72]
+ self.num_sections, = struct.unpack('>H', self.contents[76:78])
+ # Dictionary or normal content (TODO: Not hard-coded)
+ if self.header[0x3C:0x3C+8] != ident:
+ if self.header[0x3C:0x3C+8] == "PDctPPrs":
+ self.bkType = "Dict"
else:
- j = j + 1
- if inGroup:
- result.append(('grpend',-1))
- result.append(('pageend', -1))
- return pagetype, result
-
-
-
- # build a description of the paragraph
- def getParaDescription(self, start, end, regtype):
-
- result = []
-
- # paragraph
- (pos, pclass) = self.findinDoc('paragraph.class',start,end)
-
- pclass = self.getClass(pclass)
-
- # if paragraph uses extratokens (extra glyphs) then make it fixed
- (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
-
- # build up a description of the paragraph in result and return it
- # first check for the basic - all words paragraph
- (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
- (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
- if (sfirst != None) and (slast != None) :
- first = int(sfirst)
- last = int(slast)
-
- makeImage = (regtype == 'vertical') or (regtype == 'table')
- makeImage = makeImage or (extraglyphs != None)
- if self.fixedimage:
- makeImage = makeImage or (regtype == 'fixed')
-
- if (pclass != None):
- makeImage = makeImage or (pclass.find('.inverted') >= 0)
- if self.fixedimage :
- makeImage = makeImage or (pclass.find('cl-f-') >= 0)
-
- # before creating an image make sure glyph info exists
- gidList = self.getData('info.glyph.glyphID',0,-1)
-
- makeImage = makeImage & (len(gidList) > 0)
-
- if not makeImage :
- # standard all word paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- # convert paragraph to svg image
- # translate first and last word into first and last glyphs
- # and generate inline image and include it
- glyphList = []
- firstglyphList = self.getData('word.firstGlyph',0,-1)
- gidList = self.getData('info.glyph.glyphID',0,-1)
- firstGlyph = firstglyphList[first]
- if last < len(firstglyphList):
- lastGlyph = firstglyphList[last]
- else :
- lastGlyph = len(gidList)
-
- # handle case of white sapce paragraphs with no actual glyphs in them
- # by reverting to text based paragraph
- if firstGlyph >= lastGlyph:
- # revert to standard text based paragraph
- for wordnum in xrange(first, last):
- result.append(('ocr', wordnum))
- return pclass, result
-
- for glyphnum in xrange(firstGlyph, lastGlyph):
- glyphList.append(glyphnum)
- # include any extratokens if they exist
- (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
- (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
- if (sfg != None) and (slg != None):
- for glyphnum in xrange(int(sfg), int(slg)):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- return pclass, result
-
- # this type of paragraph may be made up of multiple spans, inline
- # word monograms (images), and words with semantic meaning,
- # plus glyphs used to form starting letter of first word
-
- # need to parse this type line by line
- line = start + 1
- word_class = ''
-
- # if end is -1 then we must search to end of document
- if end == -1 :
- end = self.docSize
-
- # seems some xml has last* coming before first* so we have to
- # handle any order
- sp_first = -1
- sp_last = -1
-
- gl_first = -1
- gl_last = -1
-
- ws_first = -1
- ws_last = -1
-
- word_class = ''
-
- word_semantic_type = ''
-
- while (line < end) :
-
- (name, argres) = self.lineinDoc(line)
-
- if name.endswith('span.firstWord') :
- sp_first = int(argres)
-
- elif name.endswith('span.lastWord') :
- sp_last = int(argres)
-
- elif name.endswith('word.firstGlyph') :
- gl_first = int(argres)
-
- elif name.endswith('word.lastGlyph') :
- gl_last = int(argres)
-
- elif name.endswith('word_semantic.firstWord'):
- ws_first = int(argres)
-
- elif name.endswith('word_semantic.lastWord'):
- ws_last = int(argres)
-
- elif name.endswith('word.class'):
- # we only handle spaceafter word class
- try:
- (cname, space) = argres.split('-',1)
- if space == '' : space = '0'
- if (cname == 'spaceafter') and (int(space) > 0) :
- word_class = 'sa'
- except:
- pass
-
- elif name.endswith('word.img.src'):
- result.append(('img' + word_class, int(argres)))
- word_class = ''
-
- elif name.endswith('region.img.src'):
- result.append(('img' + word_class, int(argres)))
-
- if (sp_first != -1) and (sp_last != -1):
- for wordnum in xrange(sp_first, sp_last):
- result.append(('ocr', wordnum))
- sp_first = -1
- sp_last = -1
-
- if (gl_first != -1) and (gl_last != -1):
- glyphList = []
- for glyphnum in xrange(gl_first, gl_last):
- glyphList.append(glyphnum)
- num = self.svgcount
- self.glyphs_to_image(glyphList)
- self.svgcount += 1
- result.append(('svg', num))
- gl_first = -1
- gl_last = -1
-
- if (ws_first != -1) and (ws_last != -1):
- for wordnum in xrange(ws_first, ws_last):
- result.append(('ocr', wordnum))
- ws_first = -1
- ws_last = -1
-
- line += 1
-
- return pclass, result
-
-
- def buildParagraph(self, pclass, pdesc, type, regtype) :
- parares = ''
- sep =''
-
- classres = ''
- if pclass :
- classres = ' class="' + pclass + '"'
-
- br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
-
- handle_links = len(self.link_id) > 0
-
- if (type == 'full') or (type == 'begin') :
- parares += '<p' + classres + '>'
-
- if (type == 'end'):
- parares += ' '
-
- lstart = len(parares)
-
- cnt = len(pdesc)
-
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- if (title == "") or (parares.rfind(title) < 0):
- title=parares[lstart:]
- if linktype == 'external' :
- linkhref = self.link_href[link-1]
- linkhtml = '<a href="%s">' % linkhref
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkhtml = '<a href="#page%04d">' % ptarget
- else :
- # just link to the current page
- linkhtml = '<a href="#' + self.id + '">'
- linkhtml += title + '</a>'
- pos = parares.rfind(title)
- if pos >= 0:
- parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
- else :
- parares += linkhtml
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- if ((num-1) in self.dehyphen_rootid ) or handle_links:
- word = ''
- sep = ''
- elif br_lb :
- word = '<br />\n'
- sep = ''
- else :
- word = '\n'
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- elif wtype == 'img' :
- sep = ''
- parares += '<img src="img/img%04d.jpg" alt="" />' % num
- parares += sep
-
- elif wtype == 'imgsa' :
- sep = ' '
- parares += '<img src="img/img%04d.jpg" alt="" />' % num
- parares += sep
-
- elif wtype == 'svg' :
- sep = ''
- parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
- parares += sep
-
- if len(sep) > 0 : parares = parares[0:-1]
- if (type == 'full') or (type == 'end') :
- parares += '</p>'
- return parares
-
-
- def buildTOCEntry(self, pdesc) :
- parares = ''
- sep =''
- tocentry = ''
- handle_links = len(self.link_id) > 0
-
- lstart = 0
-
- cnt = len(pdesc)
- for j in xrange( 0, cnt) :
-
- (wtype, num) = pdesc[j]
-
- if wtype == 'ocr' :
- word = self.ocrtext[num]
- sep = ' '
-
- if handle_links:
- link = self.link_id[num]
- if (link > 0):
- linktype = self.link_type[link-1]
- title = self.link_title[link-1]
- title = title.rstrip('. ')
- alt_title = parares[lstart:]
- alt_title = alt_title.strip()
- # now strip off the actual printed page number
- alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
- alt_title = alt_title.rstrip('. ')
- # skip over any external links - can't have them in a books toc
- if linktype == 'external' :
- title = ''
- alt_title = ''
- linkpage = ''
- else :
- if len(self.link_page) >= link :
- ptarget = self.link_page[link-1] - 1
- linkpage = '%04d' % ptarget
- else :
- # just link to the current page
- linkpage = self.id[4:]
- if len(alt_title) >= len(title):
- title = alt_title
- if title != '' and linkpage != '':
- tocentry += title + '|' + linkpage + '\n'
- lstart = len(parares)
- if word == '_link_' : word = ''
- elif (link < 0) :
- if word == '_link_' : word = ''
-
- if word == '_lb_':
- word = ''
- sep = ''
-
- if num in self.dehyphen_rootid :
- word = word[0:-1]
- sep = ''
-
- parares += word + sep
-
- else :
- continue
-
- return tocentry
-
-
-
-
- # walk the document tree collecting the information needed
- # to build an html page using the ocrText
-
- def process(self):
-
- tocinfo = ''
- hlst = []
-
- # get the ocr text
- (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
- if argres : self.ocrtext = argres.split('|')
-
- # get information to dehyphenate the text
- self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
-
- # determine if first paragraph is continued from previous page
- (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
- first_para_continued = (self.parastems_stemid != None)
-
- # determine if last paragraph is continued onto the next page
- (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
- last_para_continued = (self.paracont_stemid != None)
-
- # collect link ids
- self.link_id = self.getData('info.word.link_id',0,-1)
-
- # collect link destination page numbers
- self.link_page = self.getData('info.links.page',0,-1)
-
- # collect link types (container versus external)
- (pos, argres) = self.findinDoc('info.links.type',0,-1)
- if argres : self.link_type = argres.split('|')
-
- # collect link destinations
- (pos, argres) = self.findinDoc('info.links.href',0,-1)
- if argres : self.link_href = argres.split('|')
-
- # collect link titles
- (pos, argres) = self.findinDoc('info.links.title',0,-1)
- if argres :
- self.link_title = argres.split('|')
+ raise ValueError('Invalid file format')
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.contents[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+ def loadSection(self, section):
+ if section + 1 == self.num_sections:
+ end_off = len(self.contents)
else:
- self.link_title.append('')
-
- # get a descriptions of the starting points of the regions
- # and groups on the page
- (pagetype, pageDesc) = self.PageDescription()
- regcnt = len(pageDesc) - 1
-
- anchorSet = False
- breakSet = False
- inGroup = False
-
- # process each region on the page and convert what you can to html
-
- for j in xrange(regcnt):
-
- (etype, start) = pageDesc[j]
- (ntype, end) = pageDesc[j+1]
+ end_off = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ return self.contents[off:end_off]
+
+# cleanup unicode filenames
+# borrowed from calibre from calibre/src/calibre/__init__.py
+# added in removal of control (<32) chars
+# and removal of . at start and end
+# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
+def sanitizeFileName(name):
+ # substitute filename unfriendly characters
+ name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'")
+ # delete control characters
+ name = u"".join(char for char in name if ord(char)>=32)
+ # white space to single space, delete leading and trailing while space
+ name = re.sub(ur"\s", u" ", name).strip()
+ # remove leading dots
+ while len(name)>0 and name[0] == u".":
+ name = name[1:]
+ # remove trailing dots (Windows doesn't like them)
+ if name.endswith(u'.'):
+ name = name[:-1]
+ return name
+
+def fixKey(key):
+ def fixByte(b):
+ return b ^ ((b ^ (b<<1) ^ (b<<2) ^ (b<<3) ^ (b<<4) ^ (b<<5) ^ (b<<6) ^ (b<<7) ^ 0x80) & 0x80)
+ return "".join([chr(fixByte(ord(a))) for a in key])
+
+def deXOR(text, sp, table):
+ r=''
+ j = sp
+ for i in xrange(len(text)):
+ r += chr(ord(table[j]) ^ ord(text[i]))
+ j = j + 1
+ if j == len(table):
+ j = 0
+ return r
+
+class EreaderProcessor(object):
+ def __init__(self, sect, user_key):
+ self.section_reader = sect.loadSection
+ data = self.section_reader(0)
+ version, = struct.unpack('>H', data[0:2])
+ self.version = version
+ logging.info('eReader file format version %s', version)
+ if version != 272 and version != 260 and version != 259:
+ raise ValueError('incorrect eReader version %d (error 1)' % version)
+ data = self.section_reader(1)
+ self.data = data
+ des = Des(fixKey(data[0:8]))
+ cookie_shuf, cookie_size = struct.unpack('>LL', des.decrypt(data[-8:]))
+ if cookie_shuf < 3 or cookie_shuf > 0x14 or cookie_size < 0xf0 or cookie_size > 0x200:
+ raise ValueError('incorrect eReader version (error 2)')
+ input = des.decrypt(data[-cookie_size:])
+ def unshuff(data, shuf):
+ r = [''] * len(data)
+ j = 0
+ for i in xrange(len(data)):
+ j = (j + shuf) % len(data)
+ r[j] = data[i]
+ assert len("".join(r)) == len(data)
+ return "".join(r)
+ r = unshuff(input[0:-8], cookie_shuf)
+
+ drm_sub_version = struct.unpack('>H', r[0:2])[0]
+ self.num_text_pages = struct.unpack('>H', r[2:4])[0] - 1
+ self.num_image_pages = struct.unpack('>H', r[26:26+2])[0]
+ self.first_image_page = struct.unpack('>H', r[24:24+2])[0]
+ # Default values
+ self.num_footnote_pages = 0
+ self.num_sidebar_pages = 0
+ self.first_footnote_page = -1
+ self.first_sidebar_page = -1
+ if self.version == 272:
+ self.num_footnote_pages = struct.unpack('>H', r[46:46+2])[0]
+ self.first_footnote_page = struct.unpack('>H', r[44:44+2])[0]
+ if (sect.bkType == "Book"):
+ self.num_sidebar_pages = struct.unpack('>H', r[38:38+2])[0]
+ self.first_sidebar_page = struct.unpack('>H', r[36:36+2])[0]
+ # self.num_bookinfo_pages = struct.unpack('>H', r[34:34+2])[0]
+ # self.first_bookinfo_page = struct.unpack('>H', r[32:32+2])[0]
+ # self.num_chapter_pages = struct.unpack('>H', r[22:22+2])[0]
+ # self.first_chapter_page = struct.unpack('>H', r[20:20+2])[0]
+ # self.num_link_pages = struct.unpack('>H', r[30:30+2])[0]
+ # self.first_link_page = struct.unpack('>H', r[28:28+2])[0]
+ # self.num_xtextsize_pages = struct.unpack('>H', r[54:54+2])[0]
+ # self.first_xtextsize_page = struct.unpack('>H', r[52:52+2])[0]
+
+ # **before** data record 1 was decrypted and unshuffled, it contained data
+ # to create an XOR table and which is used to fix footnote record 0, link records, chapter records, etc
+ self.xortable_offset = struct.unpack('>H', r[40:40+2])[0]
+ self.xortable_size = struct.unpack('>H', r[42:42+2])[0]
+ self.xortable = self.data[self.xortable_offset:self.xortable_offset + self.xortable_size]
+ else:
+ # Nothing needs to be done
+ pass
+ # self.num_bookinfo_pages = 0
+ # self.num_chapter_pages = 0
+ # self.num_link_pages = 0
+ # self.num_xtextsize_pages = 0
+ # self.first_bookinfo_page = -1
+ # self.first_chapter_page = -1
+ # self.first_link_page = -1
+ # self.first_xtextsize_page = -1
+
+ logging.debug('self.num_text_pages %d', self.num_text_pages)
+ logging.debug('self.num_footnote_pages %d, self.first_footnote_page %d', self.num_footnote_pages , self.first_footnote_page)
+ logging.debug('self.num_sidebar_pages %d, self.first_sidebar_page %d', self.num_sidebar_pages , self.first_sidebar_page)
+ self.flags = struct.unpack('>L', r[4:8])[0]
+ reqd_flags = (1<<9) | (1<<7) | (1<<10)
+ if (self.flags & reqd_flags) != reqd_flags:
+ print "Flags: 0x%X" % self.flags
+ raise ValueError('incompatible eReader file')
+ des = Des(fixKey(user_key))
+ if version == 259:
+ if drm_sub_version != 7:
+ raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
+ encrypted_key_sha = r[44:44+20]
+ encrypted_key = r[64:64+8]
+ elif version == 260:
+ if drm_sub_version != 13 and drm_sub_version != 11:
+ raise ValueError('incorrect eReader version %d (error 3)' % drm_sub_version)
+ if drm_sub_version == 13:
+ encrypted_key = r[44:44+8]
+ encrypted_key_sha = r[52:52+20]
+ else:
+ encrypted_key = r[64:64+8]
+ encrypted_key_sha = r[44:44+20]
+ elif version == 272:
+ encrypted_key = r[172:172+8]
+ encrypted_key_sha = r[56:56+20]
+ self.content_key = des.decrypt(encrypted_key)
+ if sha1(self.content_key).digest() != encrypted_key_sha:
+ raise ValueError('Incorrect Name and/or Credit Card')
+
+ def getNumImages(self):
+ return self.num_image_pages
+
+ def getImage(self, i):
+ sect = self.section_reader(self.first_image_page + i)
+ name = sect[4:4+32].strip('\0')
+ data = sect[62:]
+ return sanitizeFileName(unicode(name,'windows-1252')), data
+
+
+ # def getChapterNamePMLOffsetData(self):
+ # cv = ''
+ # if self.num_chapter_pages > 0:
+ # for i in xrange(self.num_chapter_pages):
+ # chaps = self.section_reader(self.first_chapter_page + i)
+ # j = i % self.xortable_size
+ # offname = deXOR(chaps, j, self.xortable)
+ # offset = struct.unpack('>L', offname[0:4])[0]
+ # name = offname[4:].strip('\0')
+ # cv += '%d|%s\n' % (offset, name)
+ # return cv
+
+ # def getLinkNamePMLOffsetData(self):
+ # lv = ''
+ # if self.num_link_pages > 0:
+ # for i in xrange(self.num_link_pages):
+ # links = self.section_reader(self.first_link_page + i)
+ # j = i % self.xortable_size
+ # offname = deXOR(links, j, self.xortable)
+ # offset = struct.unpack('>L', offname[0:4])[0]
+ # name = offname[4:].strip('\0')
+ # lv += '%d|%s\n' % (offset, name)
+ # return lv
+
+ # def getExpandedTextSizesData(self):
+ # ts = ''
+ # if self.num_xtextsize_pages > 0:
+ # tsize = deXOR(self.section_reader(self.first_xtextsize_page), 0, self.xortable)
+ # for i in xrange(self.num_text_pages):
+ # xsize = struct.unpack('>H', tsize[0:2])[0]
+ # ts += "%d\n" % xsize
+ # tsize = tsize[2:]
+ # return ts
+
+ # def getBookInfo(self):
+ # bkinfo = ''
+ # if self.num_bookinfo_pages > 0:
+ # info = self.section_reader(self.first_bookinfo_page)
+ # bkinfo = deXOR(info, 0, self.xortable)
+ # bkinfo = bkinfo.replace('\0','|')
+ # bkinfo += '\n'
+ # return bkinfo
+
+ def getText(self):
+ des = Des(fixKey(self.content_key))
+ r = ''
+ for i in xrange(self.num_text_pages):
+ logging.debug('get page %d', i)
+ r += zlib.decompress(des.decrypt(self.section_reader(1 + i)))
+
+ # now handle footnotes pages
+ if self.num_footnote_pages > 0:
+ r += '\n'
+ # the record 0 of the footnote section must pass through the Xor Table to make it useful
+ sect = self.section_reader(self.first_footnote_page)
+ fnote_ids = deXOR(sect, 0, self.xortable)
+ # the remaining records of the footnote sections need to be decoded with the content_key and zlib inflated
+ des = Des(fixKey(self.content_key))
+ for i in xrange(1,self.num_footnote_pages):
+ logging.debug('get footnotepage %d', i)
+ id_len = ord(fnote_ids[2])
+ id = fnote_ids[3:3+id_len]
+ fmarker = '<footnote id="%s">\n' % id
+ fmarker += zlib.decompress(des.decrypt(self.section_reader(self.first_footnote_page + i)))
+ fmarker += '\n</footnote>\n'
+ r += fmarker
+ fnote_ids = fnote_ids[id_len+4:]
+
+ # TODO: Handle dictionary index (?) pages - which are also marked as
+ # sidebar_pages (?). For now dictionary sidebars are ignored
+ # For dictionaries - record 0 is null terminated strings, followed by
+ # blocks of around 62000 bytes and a final block. Not sure of the
+ # encoding
+
+ # now handle sidebar pages
+ if self.num_sidebar_pages > 0:
+ r += '\n'
+ # the record 0 of the sidebar section must pass through the Xor Table to make it useful
+ sect = self.section_reader(self.first_sidebar_page)
+ sbar_ids = deXOR(sect, 0, self.xortable)
+ # the remaining records of the sidebar sections need to be decoded with the content_key and zlib inflated
+ des = Des(fixKey(self.content_key))
+ for i in xrange(1,self.num_sidebar_pages):
+ id_len = ord(sbar_ids[2])
+ id = sbar_ids[3:3+id_len]
+ smarker = '<sidebar id="%s">\n' % id
+ smarker += zlib.decompress(des.decrypt(self.section_reader(self.first_sidebar_page + i)))
+ smarker += '\n</sidebar>\n'
+ r += smarker
+ sbar_ids = sbar_ids[id_len+4:]
+
+ return r
+
+def cleanPML(pml):
+ # Convert special characters to proper PML code. High ASCII start at (\x80, \a128) and go up to (\xff, \a255)
+ pml2 = pml
+ for k in xrange(128,256):
+ badChar = chr(k)
+ pml2 = pml2.replace(badChar, '\\a%03d' % k)
+ return pml2
+
+def decryptBook(infile, outpath, make_pmlz, user_key):
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+ if make_pmlz:
+ # outpath is actually pmlz name
+ pmlzname = outpath
+ outdir = tempfile.mkdtemp()
+ imagedirpath = os.path.join(outdir,u"images")
+ else:
+ pmlzname = None
+ outdir = outpath
+ imagedirpath = os.path.join(outdir,bookname + u"_img")
+
+ try:
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ print u"Decoding File"
+ sect = Sectionizer(infile, 'PNRdPPrs')
+ er = EreaderProcessor(sect, user_key)
+
+ if er.getNumImages() > 0:
+ print u"Extracting images"
+ if not os.path.exists(imagedirpath):
+ os.makedirs(imagedirpath)
+ for i in xrange(er.getNumImages()):
+ name, contents = er.getImage(i)
+ file(os.path.join(imagedirpath, name), 'wb').write(contents)
+
+ print u"Extracting pml"
+ pml_string = er.getText()
+ pmlfilename = bookname + ".pml"
+ file(os.path.join(outdir, pmlfilename),'wb').write(cleanPML(pml_string))
+ if pmlzname is not None:
+ import zipfile
+ import shutil
+ print u"Creating PMLZ file {0}".format(os.path.basename(pmlzname))
+ myZipFile = zipfile.ZipFile(pmlzname,'w',zipfile.ZIP_STORED, False)
+ list = os.listdir(outdir)
+ for filename in list:
+ localname = filename
+ filePath = os.path.join(outdir,filename)
+ if os.path.isfile(filePath):
+ myZipFile.write(filePath, localname)
+ elif os.path.isdir(filePath):
+ imageList = os.listdir(filePath)
+ localimgdir = os.path.basename(filePath)
+ for image in imageList:
+ localname = os.path.join(localimgdir,image)
+ imagePath = os.path.join(filePath,image)
+ if os.path.isfile(imagePath):
+ myZipFile.write(imagePath, localname)
+ myZipFile.close()
+ # remove temporary directory
+ shutil.rmtree(outdir, True)
+ print u"Output is {0}".format(pmlzname)
+ else :
+ print u"Output is in {0}".format(outdir)
+ print "done"
+ except ValueError, e:
+ print u"Error: {0}".format(e)
+ traceback.print_exc()
+ return 1
+ return 0
+
+
+def usage():
+ print u"Converts DRMed eReader books to PML Source"
+ print u"Usage:"
+ print u" erdr2pml [options] infile.pdb [outpath] \"your name\" credit_card_number"
+ print u" "
+ print u"Options: "
+ print u" -h prints this message"
+ print u" -p create PMLZ instead of source folder"
+ print u" --make-pmlz create PMLZ instead of source folder"
+ print u" "
+ print u"Note:"
+ print u" if outpath is ommitted, creates source in 'infile_Source' folder"
+ print u" if outpath is ommitted and pmlz option, creates PMLZ 'infile.pmlz'"
+ print u" if source folder created, images are in infile_img folder"
+ print u" if pmlz file created, images are in images folder"
+ print u" It's enough to enter the last 8 digits of the credit card number"
+ return
+
+def getuser_key(name,cc):
+ newname = "".join(c for c in name.lower() if c >= 'a' and c <= 'z' or c >= '0' and c <= '9')
+ cc = cc.replace(" ","")
+ return struct.pack('>LL', binascii.crc32(newname) & 0xffffffff,binascii.crc32(cc[-8:])& 0xffffffff)
+
+def cli_main():
+ print u"eRdr2Pml v{0}. Copyright © 2009–2012 The Dark Reverser et al.".format(__version__)
+
+ argv=unicode_argv()
+ try:
+ opts, args = getopt.getopt(argv[1:], "hp", ["make-pmlz"])
+ except getopt.GetoptError, err:
+ print err.args[0]
+ usage()
+ return 1
+ make_pmlz = False
+ for o, a in opts:
+ if o == "-h":
+ usage()
+ return 0
+ elif o == "-p":
+ make_pmlz = True
+ elif o == "--make-pmlz":
+ make_pmlz = True
+
+ if len(args)!=3 and len(args)!=4:
+ usage()
+ return 1
+
+ if len(args)==3:
+ infile, name, cc = args
+ if make_pmlz:
+ outpath = os.path.splitext(infile)[0] + u".pmlz"
+ else:
+ outpath = os.path.splitext(infile)[0] + u"_Source"
+ elif len(args)==4:
+ infile, outpath, name, cc = args
+ print getuser_key(name,cc).encode('hex')
- # set anchor for link target on this page
- if not anchorSet and not first_para_continued:
- hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
- hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
- anchorSet = True
+ return decryptBook(infile, outpath, make_pmlz, getuser_key(name,cc))
- # handle groups of graphics with text captions
- if (etype == 'grpbeg'):
- (pos, grptype) = self.findinDoc('group.type', start, end)
- if grptype != None:
- if grptype == 'graphic':
- gcstr = ' class="' + grptype + '"'
- hlst.append('<div' + gcstr + '>')
- inGroup = True
- elif (etype == 'grpend'):
- if inGroup:
- hlst.append('</div>\n')
- inGroup = False
+if __name__ == "__main__":
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
+ sys.exit(cli_main())
- else:
- (pos, regtype) = self.findinDoc('region.type',start,end)
-
- if regtype == 'graphic' :
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- if inGroup:
- hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
- else:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
- elif regtype == 'chapterheading' :
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not breakSet:
- hlst.append('<div style="page-break-after: always;">&nbsp;</div>\n')
- breakSet = True
- tag = 'h1'
- if pclass and (len(pclass) >= 7):
- if pclass[3:7] == 'ch1-' : tag = 'h1'
- if pclass[3:7] == 'ch2-' : tag = 'h2'
- if pclass[3:7] == 'ch3-' : tag = 'h3'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- else:
- hlst.append('<' + tag + '>')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
-
- elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if pclass and (len(pclass) >= 6) and (ptype == 'full'):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'tocentry') :
- ptype = 'full'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- tocinfo += self.buildTOCEntry(pdesc)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
- elif (regtype == 'vertical') or (regtype == 'table') :
- ptype = 'full'
- if inGroup:
- ptype = 'middle'
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start, end, regtype)
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
-
-
- elif (regtype == 'synth_fcvr.center'):
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
- else :
- print ' Making region type', regtype,
- (pos, temp) = self.findinDoc('paragraph',start,end)
- (pos2, temp) = self.findinDoc('span',start,end)
- if pos != -1 or pos2 != -1:
- print ' a "text" region'
- orig_regtype = regtype
- regtype = 'fixed'
- ptype = 'full'
- # check to see if this is a continution from the previous page
- if first_para_continued :
- ptype = 'end'
- first_para_continued = False
- (pclass, pdesc) = self.getParaDescription(start,end, regtype)
- if not pclass:
- if orig_regtype.endswith('.right') : pclass = 'cl-right'
- elif orig_regtype.endswith('.center') : pclass = 'cl-center'
- elif orig_regtype.endswith('.left') : pclass = 'cl-left'
- elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
- if pclass and (ptype == 'full') and (len(pclass) >= 6):
- tag = 'p'
- if pclass[3:6] == 'h1-' : tag = 'h4'
- if pclass[3:6] == 'h2-' : tag = 'h5'
- if pclass[3:6] == 'h3-' : tag = 'h6'
- hlst.append('<' + tag + ' class="' + pclass + '">')
- hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
- hlst.append('</' + tag + '>')
- else :
- hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
- else :
- print ' a "graphic" region'
- (pos, simgsrc) = self.findinDoc('img.src',start,end)
- if simgsrc:
- hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
-
-
- htmlpage = "".join(hlst)
- if last_para_continued :
- if htmlpage[-4:] == '</p>':
- htmlpage = htmlpage[0:-4]
- last_para_continued = False
-
- return htmlpage, tocinfo
-
-
-def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
- # create a document parser
- dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
- htmlpage, tocinfo = dp.process()
- return htmlpage, tocinfo
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
index 4dfd6c7..991591b 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2html.py
@@ -1,63 +1,127 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# For use with Topaz Scripts Version 2.6
import sys
import csv
import os
+import math
import getopt
from struct import pack
from struct import unpack
-class PParser(object):
- def __init__(self, gd, flatxml, meta_array):
- self.gd = gd
- self.flatdoc = flatxml.split('\n')
- self.docSize = len(self.flatdoc)
- self.temp = []
-
- self.ph = -1
- self.pw = -1
- startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.ph = max(self.ph, int(argres))
- startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
- for p in startpos:
- (name, argres) = self.lineinDoc(p)
- self.pw = max(self.pw, int(argres))
-
- if self.ph <= 0:
- self.ph = int(meta_array.get('pageHeight', '11000'))
- if self.pw <= 0:
- self.pw = int(meta_array.get('pageWidth', '8500'))
-
- res = []
- startpos = self.posinDoc('info.glyph.x')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.x', p)
- res.extend(argres)
- self.gx = res
-
- res = []
- startpos = self.posinDoc('info.glyph.y')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.y', p)
- res.extend(argres)
- self.gy = res
-
- res = []
- startpos = self.posinDoc('info.glyph.glyphID')
- for p in startpos:
- argres = self.getDataatPos('info.glyph.glyphID', p)
- res.extend(argres)
- self.gid = res
+class DocParser(object):
+ def __init__(self, flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ self.id = os.path.basename(fileid).replace('.dat','')
+ self.svgcount = 0
+ self.docList = flatxml.split('\n')
+ self.docSize = len(self.docList)
+ self.classList = {}
+ self.bookDir = bookDir
+ self.gdict = gdict
+ tmpList = classlst.split('\n')
+ for pclass in tmpList:
+ if pclass != '':
+ # remove the leading period from the css name
+ cname = pclass[1:]
+ self.classList[cname] = True
+ self.fixedimage = fixedimage
+ self.ocrtext = []
+ self.link_id = []
+ self.link_title = []
+ self.link_page = []
+ self.link_href = []
+ self.link_type = []
+ self.dehyphen_rootid = []
+ self.paracont_stemid = []
+ self.parastems_stemid = []
+
+
+ def getGlyph(self, gid):
+ result = ''
+ id='id="gl%d"' % gid
+ return self.gdict.lookup(id)
+
+ def glyphs_to_image(self, glyphList):
+
+ def extract(path, key):
+ b = path.find(key) + len(key)
+ e = path.find(' ',b)
+ return int(path[b:e])
+
+ svgDir = os.path.join(self.bookDir,'svg')
+
+ imgDir = os.path.join(self.bookDir,'img')
+ imgname = self.id + '_%04d.svg' % self.svgcount
+ imgfile = os.path.join(imgDir,imgname)
+
+ # get glyph information
+ gxList = self.getData('info.glyph.x',0,-1)
+ gyList = self.getData('info.glyph.y',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+
+ gids = []
+ maxws = []
+ maxhs = []
+ xs = []
+ ys = []
+ gdefs = []
+
+ # get path defintions, positions, dimensions for each glyph
+ # that makes up the image, and find min x and min y to reposition origin
+ minx = -1
+ miny = -1
+ for j in glyphList:
+ gid = gidList[j]
+ gids.append(gid)
+
+ xs.append(gxList[j])
+ if minx == -1: minx = gxList[j]
+ else : minx = min(minx, gxList[j])
+
+ ys.append(gyList[j])
+ if miny == -1: miny = gyList[j]
+ else : miny = min(miny, gyList[j])
+
+ path = self.getGlyph(gid)
+ gdefs.append(path)
+
+ maxws.append(extract(path,'width='))
+ maxhs.append(extract(path,'height='))
+
+
+ # change the origin to minx, miny and calc max height and width
+ maxw = maxws[0] + xs[0] - minx
+ maxh = maxhs[0] + ys[0] - miny
+ for j in xrange(0, len(xs)):
+ xs[j] = xs[j] - minx
+ ys[j] = ys[j] - miny
+ maxw = max( maxw, (maxws[j] + xs[j]) )
+ maxh = max( maxh, (maxhs[j] + ys[j]) )
+
+ # open the image file for output
+ ifile = open(imgfile,'w')
+ ifile.write('<?xml version="1.0" standalone="no"?>\n')
+ ifile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ ifile.write('<svg width="%dpx" height="%dpx" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (math.floor(maxw/10), math.floor(maxh/10), maxw, maxh))
+ ifile.write('<defs>\n')
+ for j in xrange(0,len(gdefs)):
+ ifile.write(gdefs[j])
+ ifile.write('</defs>\n')
+ for j in xrange(0,len(gids)):
+ ifile.write('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (gids[j], xs[j], ys[j]))
+ ifile.write('</svg>')
+ ifile.close()
+
+ return 0
+
# return tag at line pos in document
def lineinDoc(self, pos) :
if (pos >= 0) and (pos < self.docSize) :
- item = self.flatdoc[pos]
+ item = self.docList[pos]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
@@ -65,6 +129,7 @@ class PParser(object):
argres = ''
return name, argres
+
# find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
@@ -74,7 +139,7 @@ class PParser(object):
end = min(self.docSize, end)
foundat = -1
for j in xrange(pos, end):
- item = self.flatdoc[j]
+ item = self.docList[j]
if item.find('=') >= 0:
(name, argres) = item.split('=',1)
else :
@@ -86,6 +151,7 @@ class PParser(object):
break
return foundat, result
+
# return list of start positions for the tagpath
def posinDoc(self, tagpath):
startpos = []
@@ -98,152 +164,638 @@ class PParser(object):
pos = foundpos + 1
return startpos
- def getData(self, path):
- result = None
- cnt = len(self.flatdoc)
- for j in xrange(cnt):
- item = self.flatdoc[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
- def getDataatPos(self, path, pos):
- result = None
- item = self.flatdoc[pos]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- if (name.endswith(path)):
- result = argres
- return result
-
- def getDataTemp(self, path):
- result = None
- cnt = len(self.temp)
- for j in xrange(cnt):
- item = self.temp[j]
- if item.find('=') >= 0:
- (name, argt) = item.split('=')
- argres = argt.split('|')
- else:
- name = item
- argres = []
- if (name.endswith(path)):
- result = argres
- self.temp.pop(j)
- break
- if (len(argres) > 0) :
- for j in xrange(0,len(argres)):
- argres[j] = int(argres[j])
- return result
+ # returns a vector of integers for the tagpath
+ def getData(self, tagpath, pos, end):
+ argres=[]
+ (foundat, argt) = self.findinDoc(tagpath, pos, end)
+ if (argt != None) and (len(argt) > 0) :
+ argList = argt.split('|')
+ argres = [ int(strval) for strval in argList]
+ return argres
+
+
+ # get the class
+ def getClass(self, pclass):
+ nclass = pclass
+
+ # class names are an issue given topaz may start them with numerals (not allowed),
+ # use a mix of cases (which cause some browsers problems), and actually
+ # attach numbers after "_reclustered*" to the end to deal classeses that inherit
+ # from a base class (but then not actually provide all of these _reclustereed
+ # classes in the stylesheet!
+
+ # so we clean this up by lowercasing, prepend 'cl-', and getting any baseclass
+ # that exists in the stylesheet first, and then adding this specific class
+ # after
+
+ # also some class names have spaces in them so need to convert to dashes
+ if nclass != None :
+ nclass = nclass.replace(' ','-')
+ classres = ''
+ nclass = nclass.lower()
+ nclass = 'cl-' + nclass
+ baseclass = ''
+ # graphic is the base class for captions
+ if nclass.find('cl-cap-') >=0 :
+ classres = 'graphic' + ' '
+ else :
+ # strip to find baseclass
+ p = nclass.find('_')
+ if p > 0 :
+ baseclass = nclass[0:p]
+ if baseclass in self.classList:
+ classres += baseclass + ' '
+ classres += nclass
+ nclass = classres
+ return nclass
+
+
+ # develop a sorted description of the starting positions of
+ # groups and regions on the page, as well as the page type
+ def PageDescription(self):
+
+ def compare(x, y):
+ (xtype, xval) = x
+ (ytype, yval) = y
+ if xval > yval:
+ return 1
+ if xval == yval:
+ return 0
+ return -1
- def getImages(self):
result = []
- self.temp = self.flatdoc
- while (self.getDataTemp('img') != None):
- h = self.getDataTemp('img.h')[0]
- w = self.getDataTemp('img.w')[0]
- x = self.getDataTemp('img.x')[0]
- y = self.getDataTemp('img.y')[0]
- src = self.getDataTemp('img.src')[0]
- result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
- return result
-
- def getGlyphs(self):
+ (pos, pagetype) = self.findinDoc('page.type',0,-1)
+
+ groupList = self.posinDoc('page.group')
+ groupregionList = self.posinDoc('page.group.region')
+ pageregionList = self.posinDoc('page.region')
+ # integrate into one list
+ for j in groupList:
+ result.append(('grpbeg',j))
+ for j in groupregionList:
+ result.append(('gregion',j))
+ for j in pageregionList:
+ result.append(('pregion',j))
+ result.sort(compare)
+
+ # insert group end and page end indicators
+ inGroup = False
+ j = 0
+ while True:
+ if j == len(result): break
+ rtype = result[j][0]
+ rval = result[j][1]
+ if not inGroup and (rtype == 'grpbeg') :
+ inGroup = True
+ j = j + 1
+ elif inGroup and (rtype in ('grpbeg', 'pregion')):
+ result.insert(j,('grpend',rval))
+ inGroup = False
+ else:
+ j = j + 1
+ if inGroup:
+ result.append(('grpend',-1))
+ result.append(('pageend', -1))
+ return pagetype, result
+
+
+
+ # build a description of the paragraph
+ def getParaDescription(self, start, end, regtype):
+
result = []
- if (self.gid != None) and (len(self.gid) > 0):
- glyphs = []
- for j in set(self.gid):
- glyphs.append(j)
- glyphs.sort()
- for gid in glyphs:
- id='id="gl%d"' % gid
- path = self.gd.lookup(id)
- if path:
- result.append(id + ' ' + path)
- return result
-
-
-def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
- mlst = []
- pp = PParser(gdict, flat_xml, meta_array)
- mlst.append('<?xml version="1.0" standalone="no"?>\n')
- if (raw):
- mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
- mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
- else:
- mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
- mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
- mlst.append('<script><![CDATA[\n')
- mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
- mlst.append('var dpi=%d;\n' % scaledpi)
- if (previd) :
- mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
- if (nextid) :
- mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
- mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
- mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
- mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
- mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
- mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
- mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
- mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
- mlst.append('window.onload=setsize;\n')
- mlst.append(']]></script>\n')
- mlst.append('</head>\n')
- mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
- mlst.append('<div style="white-space:nowrap;">\n')
- if previd == None:
- mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
+
+ # paragraph
+ (pos, pclass) = self.findinDoc('paragraph.class',start,end)
+
+ pclass = self.getClass(pclass)
+
+ # if paragraph uses extratokens (extra glyphs) then make it fixed
+ (pos, extraglyphs) = self.findinDoc('paragraph.extratokens',start,end)
+
+ # build up a description of the paragraph in result and return it
+ # first check for the basic - all words paragraph
+ (pos, sfirst) = self.findinDoc('paragraph.firstWord',start,end)
+ (pos, slast) = self.findinDoc('paragraph.lastWord',start,end)
+ if (sfirst != None) and (slast != None) :
+ first = int(sfirst)
+ last = int(slast)
+
+ makeImage = (regtype == 'vertical') or (regtype == 'table')
+ makeImage = makeImage or (extraglyphs != None)
+ if self.fixedimage:
+ makeImage = makeImage or (regtype == 'fixed')
+
+ if (pclass != None):
+ makeImage = makeImage or (pclass.find('.inverted') >= 0)
+ if self.fixedimage :
+ makeImage = makeImage or (pclass.find('cl-f-') >= 0)
+
+ # before creating an image make sure glyph info exists
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+
+ makeImage = makeImage & (len(gidList) > 0)
+
+ if not makeImage :
+ # standard all word paragraph
+ for wordnum in xrange(first, last):
+ result.append(('ocr', wordnum))
+ return pclass, result
+
+ # convert paragraph to svg image
+ # translate first and last word into first and last glyphs
+ # and generate inline image and include it
+ glyphList = []
+ firstglyphList = self.getData('word.firstGlyph',0,-1)
+ gidList = self.getData('info.glyph.glyphID',0,-1)
+ firstGlyph = firstglyphList[first]
+ if last < len(firstglyphList):
+ lastGlyph = firstglyphList[last]
+ else :
+ lastGlyph = len(gidList)
+
+ # handle case of white sapce paragraphs with no actual glyphs in them
+ # by reverting to text based paragraph
+ if firstGlyph >= lastGlyph:
+ # revert to standard text based paragraph
+ for wordnum in xrange(first, last):
+ result.append(('ocr', wordnum))
+ return pclass, result
+
+ for glyphnum in xrange(firstGlyph, lastGlyph):
+ glyphList.append(glyphnum)
+ # include any extratokens if they exist
+ (pos, sfg) = self.findinDoc('extratokens.firstGlyph',start,end)
+ (pos, slg) = self.findinDoc('extratokens.lastGlyph',start,end)
+ if (sfg != None) and (slg != None):
+ for glyphnum in xrange(int(sfg), int(slg)):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ return pclass, result
+
+ # this type of paragraph may be made up of multiple spans, inline
+ # word monograms (images), and words with semantic meaning,
+ # plus glyphs used to form starting letter of first word
+
+ # need to parse this type line by line
+ line = start + 1
+ word_class = ''
+
+ # if end is -1 then we must search to end of document
+ if end == -1 :
+ end = self.docSize
+
+ # seems some xml has last* coming before first* so we have to
+ # handle any order
+ sp_first = -1
+ sp_last = -1
+
+ gl_first = -1
+ gl_last = -1
+
+ ws_first = -1
+ ws_last = -1
+
+ word_class = ''
+
+ word_semantic_type = ''
+
+ while (line < end) :
+
+ (name, argres) = self.lineinDoc(line)
+
+ if name.endswith('span.firstWord') :
+ sp_first = int(argres)
+
+ elif name.endswith('span.lastWord') :
+ sp_last = int(argres)
+
+ elif name.endswith('word.firstGlyph') :
+ gl_first = int(argres)
+
+ elif name.endswith('word.lastGlyph') :
+ gl_last = int(argres)
+
+ elif name.endswith('word_semantic.firstWord'):
+ ws_first = int(argres)
+
+ elif name.endswith('word_semantic.lastWord'):
+ ws_last = int(argres)
+
+ elif name.endswith('word.class'):
+ # we only handle spaceafter word class
+ try:
+ (cname, space) = argres.split('-',1)
+ if space == '' : space = '0'
+ if (cname == 'spaceafter') and (int(space) > 0) :
+ word_class = 'sa'
+ except:
+ pass
+
+ elif name.endswith('word.img.src'):
+ result.append(('img' + word_class, int(argres)))
+ word_class = ''
+
+ elif name.endswith('region.img.src'):
+ result.append(('img' + word_class, int(argres)))
+
+ if (sp_first != -1) and (sp_last != -1):
+ for wordnum in xrange(sp_first, sp_last):
+ result.append(('ocr', wordnum))
+ sp_first = -1
+ sp_last = -1
+
+ if (gl_first != -1) and (gl_last != -1):
+ glyphList = []
+ for glyphnum in xrange(gl_first, gl_last):
+ glyphList.append(glyphnum)
+ num = self.svgcount
+ self.glyphs_to_image(glyphList)
+ self.svgcount += 1
+ result.append(('svg', num))
+ gl_first = -1
+ gl_last = -1
+
+ if (ws_first != -1) and (ws_last != -1):
+ for wordnum in xrange(ws_first, ws_last):
+ result.append(('ocr', wordnum))
+ ws_first = -1
+ ws_last = -1
+
+ line += 1
+
+ return pclass, result
+
+
+ def buildParagraph(self, pclass, pdesc, type, regtype) :
+ parares = ''
+ sep =''
+
+ classres = ''
+ if pclass :
+ classres = ' class="' + pclass + '"'
+
+ br_lb = (regtype == 'fixed') or (regtype == 'chapterheading') or (regtype == 'vertical')
+
+ handle_links = len(self.link_id) > 0
+
+ if (type == 'full') or (type == 'begin') :
+ parares += '<p' + classres + '>'
+
+ if (type == 'end'):
+ parares += ' '
+
+ lstart = len(parares)
+
+ cnt = len(pdesc)
+
+ for j in xrange( 0, cnt) :
+
+ (wtype, num) = pdesc[j]
+
+ if wtype == 'ocr' :
+ try:
+ word = self.ocrtext[num]
+ except:
+ word = ""
+
+ sep = ' '
+
+ if handle_links:
+ link = self.link_id[num]
+ if (link > 0):
+ linktype = self.link_type[link-1]
+ title = self.link_title[link-1]
+ if (title == "") or (parares.rfind(title) < 0):
+ title=parares[lstart:]
+ if linktype == 'external' :
+ linkhref = self.link_href[link-1]
+ linkhtml = '<a href="%s">' % linkhref
+ else :
+ if len(self.link_page) >= link :
+ ptarget = self.link_page[link-1] - 1
+ linkhtml = '<a href="#page%04d">' % ptarget
+ else :
+ # just link to the current page
+ linkhtml = '<a href="#' + self.id + '">'
+ linkhtml += title + '</a>'
+ pos = parares.rfind(title)
+ if pos >= 0:
+ parares = parares[0:pos] + linkhtml + parares[pos+len(title):]
+ else :
+ parares += linkhtml
+ lstart = len(parares)
+ if word == '_link_' : word = ''
+ elif (link < 0) :
+ if word == '_link_' : word = ''
+
+ if word == '_lb_':
+ if ((num-1) in self.dehyphen_rootid ) or handle_links:
+ word = ''
+ sep = ''
+ elif br_lb :
+ word = '<br />\n'
+ sep = ''
+ else :
+ word = '\n'
+ sep = ''
+
+ if num in self.dehyphen_rootid :
+ word = word[0:-1]
+ sep = ''
+
+ parares += word + sep
+
+ elif wtype == 'img' :
+ sep = ''
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'imgsa' :
+ sep = ' '
+ parares += '<img src="img/img%04d.jpg" alt="" />' % num
+ parares += sep
+
+ elif wtype == 'svg' :
+ sep = ''
+ parares += '<img src="img/' + self.id + '_%04d.svg" alt="" />' % num
+ parares += sep
+
+ if len(sep) > 0 : parares = parares[0:-1]
+ if (type == 'full') or (type == 'end') :
+ parares += '</p>'
+ return parares
+
+
+ def buildTOCEntry(self, pdesc) :
+ parares = ''
+ sep =''
+ tocentry = ''
+ handle_links = len(self.link_id) > 0
+
+ lstart = 0
+
+ cnt = len(pdesc)
+ for j in xrange( 0, cnt) :
+
+ (wtype, num) = pdesc[j]
+
+ if wtype == 'ocr' :
+ word = self.ocrtext[num]
+ sep = ' '
+
+ if handle_links:
+ link = self.link_id[num]
+ if (link > 0):
+ linktype = self.link_type[link-1]
+ title = self.link_title[link-1]
+ title = title.rstrip('. ')
+ alt_title = parares[lstart:]
+ alt_title = alt_title.strip()
+ # now strip off the actual printed page number
+ alt_title = alt_title.rstrip('01234567890ivxldIVXLD-.')
+ alt_title = alt_title.rstrip('. ')
+ # skip over any external links - can't have them in a books toc
+ if linktype == 'external' :
+ title = ''
+ alt_title = ''
+ linkpage = ''
+ else :
+ if len(self.link_page) >= link :
+ ptarget = self.link_page[link-1] - 1
+ linkpage = '%04d' % ptarget
+ else :
+ # just link to the current page
+ linkpage = self.id[4:]
+ if len(alt_title) >= len(title):
+ title = alt_title
+ if title != '' and linkpage != '':
+ tocentry += title + '|' + linkpage + '\n'
+ lstart = len(parares)
+ if word == '_link_' : word = ''
+ elif (link < 0) :
+ if word == '_link_' : word = ''
+
+ if word == '_lb_':
+ word = ''
+ sep = ''
+
+ if num in self.dehyphen_rootid :
+ word = word[0:-1]
+ sep = ''
+
+ parares += word + sep
+
+ else :
+ continue
+
+ return tocentry
+
+
+
+
+ # walk the document tree collecting the information needed
+ # to build an html page using the ocrText
+
+ def process(self):
+
+ tocinfo = ''
+ hlst = []
+
+ # get the ocr text
+ (pos, argres) = self.findinDoc('info.word.ocrText',0,-1)
+ if argres : self.ocrtext = argres.split('|')
+
+ # get information to dehyphenate the text
+ self.dehyphen_rootid = self.getData('info.dehyphen.rootID',0,-1)
+
+ # determine if first paragraph is continued from previous page
+ (pos, self.parastems_stemid) = self.findinDoc('info.paraStems.stemID',0,-1)
+ first_para_continued = (self.parastems_stemid != None)
+
+ # determine if last paragraph is continued onto the next page
+ (pos, self.paracont_stemid) = self.findinDoc('info.paraCont.stemID',0,-1)
+ last_para_continued = (self.paracont_stemid != None)
+
+ # collect link ids
+ self.link_id = self.getData('info.word.link_id',0,-1)
+
+ # collect link destination page numbers
+ self.link_page = self.getData('info.links.page',0,-1)
+
+ # collect link types (container versus external)
+ (pos, argres) = self.findinDoc('info.links.type',0,-1)
+ if argres : self.link_type = argres.split('|')
+
+ # collect link destinations
+ (pos, argres) = self.findinDoc('info.links.href',0,-1)
+ if argres : self.link_href = argres.split('|')
+
+ # collect link titles
+ (pos, argres) = self.findinDoc('info.links.title',0,-1)
+ if argres :
+ self.link_title = argres.split('|')
else:
- mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
+ self.link_title.append('')
- mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
- if (pp.gid != None):
- mlst.append('<defs>\n')
- gdefs = pp.getGlyphs()
- for j in xrange(0,len(gdefs)):
- mlst.append(gdefs[j])
- mlst.append('</defs>\n')
- img = pp.getImages()
- if (img != None):
- for j in xrange(0,len(img)):
- mlst.append(img[j])
- if (pp.gid != None):
- for j in xrange(0,len(pp.gid)):
- mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
- if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
- xpos = "%d" % (pp.pw // 3)
- ypos = "%d" % (pp.ph // 3)
- mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
- if (raw) :
- mlst.append('</svg>')
- else :
- mlst.append('</svg></a>\n')
- if nextid == None:
- mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
- else :
- mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
- mlst.append('</div>\n')
- mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
- mlst.append('</body>\n')
- mlst.append('</html>\n')
- return "".join(mlst)
+ # get a descriptions of the starting points of the regions
+ # and groups on the page
+ (pagetype, pageDesc) = self.PageDescription()
+ regcnt = len(pageDesc) - 1
+
+ anchorSet = False
+ breakSet = False
+ inGroup = False
+
+ # process each region on the page and convert what you can to html
+
+ for j in xrange(regcnt):
+
+ (etype, start) = pageDesc[j]
+ (ntype, end) = pageDesc[j+1]
+
+
+ # set anchor for link target on this page
+ if not anchorSet and not first_para_continued:
+ hlst.append('<div style="visibility: hidden; height: 0; width: 0;" id="')
+ hlst.append(self.id + '" title="pagetype_' + pagetype + '"></div>\n')
+ anchorSet = True
+
+ # handle groups of graphics with text captions
+ if (etype == 'grpbeg'):
+ (pos, grptype) = self.findinDoc('group.type', start, end)
+ if grptype != None:
+ if grptype == 'graphic':
+ gcstr = ' class="' + grptype + '"'
+ hlst.append('<div' + gcstr + '>')
+ inGroup = True
+
+ elif (etype == 'grpend'):
+ if inGroup:
+ hlst.append('</div>\n')
+ inGroup = False
+
+ else:
+ (pos, regtype) = self.findinDoc('region.type',start,end)
+
+ if regtype == 'graphic' :
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ if inGroup:
+ hlst.append('<img src="img/img%04d.jpg" alt="" />' % int(simgsrc))
+ else:
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+ elif regtype == 'chapterheading' :
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not breakSet:
+ hlst.append('<div style="page-break-after: always;">&nbsp;</div>\n')
+ breakSet = True
+ tag = 'h1'
+ if pclass and (len(pclass) >= 7):
+ if pclass[3:7] == 'ch1-' : tag = 'h1'
+ if pclass[3:7] == 'ch2-' : tag = 'h2'
+ if pclass[3:7] == 'ch3-' : tag = 'h3'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
+ else:
+ hlst.append('<' + tag + '>')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
+
+ elif (regtype == 'text') or (regtype == 'fixed') or (regtype == 'insert') or (regtype == 'listitem'):
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if pclass and (len(pclass) >= 6) and (ptype == 'full'):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
+ else :
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+ elif (regtype == 'tocentry') :
+ ptype = 'full'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ tocinfo += self.buildTOCEntry(pdesc)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+ elif (regtype == 'vertical') or (regtype == 'table') :
+ ptype = 'full'
+ if inGroup:
+ ptype = 'middle'
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start, end, regtype)
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+
+
+ elif (regtype == 'synth_fcvr.center'):
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+ else :
+ print ' Making region type', regtype,
+ (pos, temp) = self.findinDoc('paragraph',start,end)
+ (pos2, temp) = self.findinDoc('span',start,end)
+ if pos != -1 or pos2 != -1:
+ print ' a "text" region'
+ orig_regtype = regtype
+ regtype = 'fixed'
+ ptype = 'full'
+ # check to see if this is a continution from the previous page
+ if first_para_continued :
+ ptype = 'end'
+ first_para_continued = False
+ (pclass, pdesc) = self.getParaDescription(start,end, regtype)
+ if not pclass:
+ if orig_regtype.endswith('.right') : pclass = 'cl-right'
+ elif orig_regtype.endswith('.center') : pclass = 'cl-center'
+ elif orig_regtype.endswith('.left') : pclass = 'cl-left'
+ elif orig_regtype.endswith('.justify') : pclass = 'cl-justify'
+ if pclass and (ptype == 'full') and (len(pclass) >= 6):
+ tag = 'p'
+ if pclass[3:6] == 'h1-' : tag = 'h4'
+ if pclass[3:6] == 'h2-' : tag = 'h5'
+ if pclass[3:6] == 'h3-' : tag = 'h6'
+ hlst.append('<' + tag + ' class="' + pclass + '">')
+ hlst.append(self.buildParagraph(pclass, pdesc, 'middle', regtype))
+ hlst.append('</' + tag + '>')
+ else :
+ hlst.append(self.buildParagraph(pclass, pdesc, ptype, regtype))
+ else :
+ print ' a "graphic" region'
+ (pos, simgsrc) = self.findinDoc('img.src',start,end)
+ if simgsrc:
+ hlst.append('<div class="graphic"><img src="img/img%04d.jpg" alt="" /></div>' % int(simgsrc))
+
+
+ htmlpage = "".join(hlst)
+ if last_para_continued :
+ if htmlpage[-4:] == '</p>':
+ htmlpage = htmlpage[0:-4]
+ last_para_continued = False
+
+ return htmlpage, tocinfo
+
+
+def convert2HTML(flatxml, classlst, fileid, bookDir, gdict, fixedimage):
+ # create a document parser
+ dp = DocParser(flatxml, classlst, fileid, bookDir, gdict, fixedimage)
+ htmlpage, tocinfo = dp.process()
+ return htmlpage, tocinfo
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py
index 3ed925d..4dfd6c7 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/flatxml2svg.py
@@ -1,148 +1,82 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-class Unbuffered:
- def __init__(self, stream):
- self.stream = stream
- def write(self, data):
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
import sys
-sys.stdout=Unbuffered(sys.stdout)
-
import csv
import os
import getopt
from struct import pack
from struct import unpack
-class TpzDRMError(Exception):
- pass
-
-# local support routines
-if 'calibre' in sys.modules:
- inCalibre = True
-else:
- inCalibre = False
-
-if inCalibre :
- from calibre_plugins.dedrm import convert2xml
- from calibre_plugins.dedrm import flatxml2html
- from calibre_plugins.dedrm import flatxml2svg
- from calibre_plugins.dedrm import stylexml2css
-else :
- import convert2xml
- import flatxml2html
- import flatxml2svg
- import stylexml2css
-
-# global switch
-buildXML = False
-
-# Get a 7 bit encoded number from a file
-def readEncodedNumber(file):
- flag = False
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- if data == 0xFF:
- flag = True
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- c = file.read(1)
- if (len(c) == 0):
- return None
- data = ord(c)
- datax = (datax <<7) + (data & 0x7F)
- data = datax
- if flag:
- data = -data
- return data
-
-# Get a length prefixed string from the file
-def lengthPrefixString(data):
- return encodeNumber(len(data))+data
-
-def readString(file):
- stringLength = readEncodedNumber(file)
- if (stringLength == None):
- return None
- sv = file.read(stringLength)
- if (len(sv) != stringLength):
- return ""
- return unpack(str(stringLength)+"s",sv)[0]
-
-def getMetaArray(metaFile):
- # parse the meta file
- result = {}
- fo = file(metaFile,'rb')
- size = readEncodedNumber(fo)
- for i in xrange(size):
- tag = readString(fo)
- value = readString(fo)
- result[tag] = value
- # print tag, value
- fo.close()
- return result
-
-
-# dictionary of all text strings by index value
-class Dictionary(object):
- def __init__(self, dictFile):
- self.filename = dictFile
- self.size = 0
- self.fo = file(dictFile,'rb')
- self.stable = []
- self.size = readEncodedNumber(self.fo)
- for i in xrange(self.size):
- self.stable.append(self.escapestr(readString(self.fo)))
- self.pos = 0
- def escapestr(self, str):
- str = str.replace('&','&amp;')
- str = str.replace('<','&lt;')
- str = str.replace('>','&gt;')
- str = str.replace('=','&#61;')
- return str
- def lookup(self,val):
- if ((val >= 0) and (val < self.size)) :
- self.pos = val
- return self.stable[self.pos]
- else:
- print "Error: %d outside of string table limits" % val
- raise TpzDRMError('outside or string table limits')
- # sys.exit(-1)
- def getSize(self):
- return self.size
- def getPos(self):
- return self.pos
-
-class PageDimParser(object):
- def __init__(self, flatxml):
+class PParser(object):
+ def __init__(self, gd, flatxml, meta_array):
+ self.gd = gd
self.flatdoc = flatxml.split('\n')
- # find tag if within pos to end inclusive
+ self.docSize = len(self.flatdoc)
+ self.temp = []
+
+ self.ph = -1
+ self.pw = -1
+ startpos = self.posinDoc('page.h') or self.posinDoc('book.h')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.ph = max(self.ph, int(argres))
+ startpos = self.posinDoc('page.w') or self.posinDoc('book.w')
+ for p in startpos:
+ (name, argres) = self.lineinDoc(p)
+ self.pw = max(self.pw, int(argres))
+
+ if self.ph <= 0:
+ self.ph = int(meta_array.get('pageHeight', '11000'))
+ if self.pw <= 0:
+ self.pw = int(meta_array.get('pageWidth', '8500'))
+
+ res = []
+ startpos = self.posinDoc('info.glyph.x')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.x', p)
+ res.extend(argres)
+ self.gx = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.y')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.y', p)
+ res.extend(argres)
+ self.gy = res
+
+ res = []
+ startpos = self.posinDoc('info.glyph.glyphID')
+ for p in startpos:
+ argres = self.getDataatPos('info.glyph.glyphID', p)
+ res.extend(argres)
+ self.gid = res
+
+
+ # return tag at line pos in document
+ def lineinDoc(self, pos) :
+ if (pos >= 0) and (pos < self.docSize) :
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=',1)
+ else :
+ name = item
+ argres = ''
+ return name, argres
+
+ # find tag in doc if within pos to end inclusive
def findinDoc(self, tagpath, pos, end) :
result = None
- docList = self.flatdoc
- cnt = len(docList)
if end == -1 :
- end = cnt
+ end = self.docSize
else:
- end = min(cnt,end)
+ end = min(self.docSize, end)
foundat = -1
for j in xrange(pos, end):
- item = docList[j]
+ item = self.flatdoc[j]
if item.find('=') >= 0:
- (name, argres) = item.split('=')
+ (name, argres) = item.split('=',1)
else :
name = item
argres = ''
@@ -151,44 +85,19 @@ class PageDimParser(object):
foundat = j
break
return foundat, result
- def process(self):
- (pos, sph) = self.findinDoc('page.h',0,-1)
- (pos, spw) = self.findinDoc('page.w',0,-1)
- if (sph == None): sph = '-1'
- if (spw == None): spw = '-1'
- return sph, spw
-def getPageDim(flatxml):
- # create a document parser
- dp = PageDimParser(flatxml)
- (ph, pw) = dp.process()
- return ph, pw
+ # return list of start positions for the tagpath
+ def posinDoc(self, tagpath):
+ startpos = []
+ pos = 0
+ res = ""
+ while res != None :
+ (foundpos, res) = self.findinDoc(tagpath, pos, -1)
+ if res != None :
+ startpos.append(foundpos)
+ pos = foundpos + 1
+ return startpos
-class GParser(object):
- def __init__(self, flatxml):
- self.flatdoc = flatxml.split('\n')
- self.dpi = 1440
- self.gh = self.getData('info.glyph.h')
- self.gw = self.getData('info.glyph.w')
- self.guse = self.getData('info.glyph.use')
- if self.guse :
- self.count = len(self.guse)
- else :
- self.count = 0
- self.gvtx = self.getData('info.glyph.vtx')
- self.glen = self.getData('info.glyph.len')
- self.gdpi = self.getData('info.glyph.dpi')
- self.vx = self.getData('info.vtx.x')
- self.vy = self.getData('info.vtx.y')
- self.vlen = self.getData('info.len.n')
- if self.vlen :
- self.glen.append(len(self.vlen))
- elif self.glen:
- self.glen.append(0)
- if self.vx :
- self.gvtx.append(len(self.vx))
- elif self.gvtx :
- self.gvtx.append(0)
def getData(self, path):
result = None
cnt = len(self.flatdoc)
@@ -200,522 +109,141 @@ class GParser(object):
else:
name = item
argres = []
- if (name == path):
+ if (name.endswith(path)):
result = argres
break
if (len(argres) > 0) :
for j in xrange(0,len(argres)):
argres[j] = int(argres[j])
return result
- def getGlyphDim(self, gly):
- if self.gdpi[gly] == 0:
- return 0, 0
- maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
- maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
- return maxh, maxw
- def getPath(self, gly):
- path = ''
- if (gly < 0) or (gly >= self.count):
- return path
- tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
- ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
- p = 0
- for k in xrange(self.glen[gly], self.glen[gly+1]):
- if (p == 0):
- zx = tx[0:self.vlen[k]+1]
- zy = ty[0:self.vlen[k]+1]
- else:
- zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
- zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
- p += 1
- j = 0
- while ( j < len(zx) ):
- if (j == 0):
- # Start Position.
- path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
- elif (j <= len(zx)-3):
- # Cubic Bezier Curve
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
- j += 2
- elif (j == len(zx)-2):
- # Cubic Bezier Curve to Start Position
- path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
- j += 1
- elif (j == len(zx)-1):
- # Quadratic Bezier Curve to Start Position
- path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
-
- j += 1
- path += 'z'
- return path
-
-
-
-# dictionary of all text strings by index value
-class GlyphDict(object):
- def __init__(self):
- self.gdict = {}
- def lookup(self, id):
- # id='id="gl%d"' % val
- if id in self.gdict:
- return self.gdict[id]
- return None
- def addGlyph(self, val, path):
- id='id="gl%d"' % val
- self.gdict[id] = path
-
-def generateBook(bookDir, raw, fixedimage):
- # sanity check Topaz file extraction
- if not os.path.exists(bookDir) :
- print "Can not find directory with unencrypted book"
- return 1
-
- dictFile = os.path.join(bookDir,'dict0000.dat')
- if not os.path.exists(dictFile) :
- print "Can not find dict0000.dat file"
- return 1
-
- pageDir = os.path.join(bookDir,'page')
- if not os.path.exists(pageDir) :
- print "Can not find page directory in unencrypted book"
- return 1
-
- imgDir = os.path.join(bookDir,'img')
- if not os.path.exists(imgDir) :
- print "Can not find image directory in unencrypted book"
- return 1
-
- glyphsDir = os.path.join(bookDir,'glyphs')
- if not os.path.exists(glyphsDir) :
- print "Can not find glyphs directory in unencrypted book"
- return 1
-
- metaFile = os.path.join(bookDir,'metadata0000.dat')
- if not os.path.exists(metaFile) :
- print "Can not find metadata0000.dat in unencrypted book"
- return 1
-
- svgDir = os.path.join(bookDir,'svg')
- if not os.path.exists(svgDir) :
- os.makedirs(svgDir)
-
- if buildXML:
- xmlDir = os.path.join(bookDir,'xml')
- if not os.path.exists(xmlDir) :
- os.makedirs(xmlDir)
-
- otherFile = os.path.join(bookDir,'other0000.dat')
- if not os.path.exists(otherFile) :
- print "Can not find other0000.dat in unencrypted book"
- return 1
-
- print "Updating to color images if available"
- spath = os.path.join(bookDir,'color_img')
- dpath = os.path.join(bookDir,'img')
- filenames = os.listdir(spath)
- filenames = sorted(filenames)
- for filename in filenames:
- imgname = filename.replace('color','img')
- sfile = os.path.join(spath,filename)
- dfile = os.path.join(dpath,imgname)
- imgdata = file(sfile,'rb').read()
- file(dfile,'wb').write(imgdata)
-
- print "Creating cover.jpg"
- isCover = False
- cpath = os.path.join(bookDir,'img')
- cpath = os.path.join(cpath,'img0000.jpg')
- if os.path.isfile(cpath):
- cover = file(cpath, 'rb').read()
- cpath = os.path.join(bookDir,'cover.jpg')
- file(cpath, 'wb').write(cover)
- isCover = True
-
-
- print 'Processing Dictionary'
- dict = Dictionary(dictFile)
-
- print 'Processing Meta Data and creating OPF'
- meta_array = getMetaArray(metaFile)
-
- # replace special chars in title and authors like & < >
- title = meta_array.get('Title','No Title Provided')
- title = title.replace('&','&amp;')
- title = title.replace('<','&lt;')
- title = title.replace('>','&gt;')
- meta_array['Title'] = title
- authors = meta_array.get('Authors','No Authors Provided')
- authors = authors.replace('&','&amp;')
- authors = authors.replace('<','&lt;')
- authors = authors.replace('>','&gt;')
- meta_array['Authors'] = authors
-
- if buildXML:
- xname = os.path.join(xmlDir, 'metadata.xml')
- mlst = []
- for key in meta_array:
- mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
- metastr = "".join(mlst)
- mlst = None
- file(xname, 'wb').write(metastr)
-
- print 'Processing StyleSheet'
-
- # get some scaling info from metadata to use while processing styles
- # and first page info
-
- fontsize = '135'
- if 'fontSize' in meta_array:
- fontsize = meta_array['fontSize']
-
- # also get the size of a normal text page
- # get the total number of pages unpacked as a safety check
- filenames = os.listdir(pageDir)
- numfiles = len(filenames)
-
- spage = '1'
- if 'firstTextPage' in meta_array:
- spage = meta_array['firstTextPage']
- pnum = int(spage)
- if pnum >= numfiles or pnum < 0:
- # metadata is wrong so just select a page near the front
- # 10% of the book to get a normal text page
- pnum = int(0.10 * numfiles)
- # print "first normal text page is", spage
-
- # get page height and width from first text page for use in stylesheet scaling
- pname = 'page%04d.dat' % (pnum + 1)
- fname = os.path.join(pageDir,pname)
- flat_xml = convert2xml.fromData(dict, fname)
-
- (ph, pw) = getPageDim(flat_xml)
- if (ph == '-1') or (ph == '0') : ph = '11000'
- if (pw == '-1') or (pw == '0') : pw = '8500'
- meta_array['pageHeight'] = ph
- meta_array['pageWidth'] = pw
- if 'fontSize' not in meta_array.keys():
- meta_array['fontSize'] = fontsize
-
- # process other.dat for css info and for map of page files to svg images
- # this map is needed because some pages actually are made up of multiple
- # pageXXXX.xml files
- xname = os.path.join(bookDir, 'style.css')
- flat_xml = convert2xml.fromData(dict, otherFile)
-
- # extract info.original.pid to get original page information
- pageIDMap = {}
- pageidnums = stylexml2css.getpageIDMap(flat_xml)
- if len(pageidnums) == 0:
- filenames = os.listdir(pageDir)
- numfiles = len(filenames)
- for k in range(numfiles):
- pageidnums.append(k)
- # create a map from page ids to list of page file nums to process for that page
- for i in range(len(pageidnums)):
- id = pageidnums[i]
- if id in pageIDMap.keys():
- pageIDMap[id].append(i)
+ def getDataatPos(self, path, pos):
+ result = None
+ item = self.flatdoc[pos]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
else:
- pageIDMap[id] = [i]
-
- # now get the css info
- cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
- file(xname, 'wb').write(cssstr)
- if buildXML:
- xname = os.path.join(xmlDir, 'other0000.xml')
- file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
-
- print 'Processing Glyphs'
- gd = GlyphDict()
- filenames = os.listdir(glyphsDir)
- filenames = sorted(filenames)
- glyfname = os.path.join(svgDir,'glyphs.svg')
- glyfile = open(glyfname, 'w')
- glyfile.write('<?xml version="1.0" standalone="no"?>\n')
- glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
- glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
- glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
- glyfile.write('<defs>\n')
- counter = 0
- for filename in filenames:
- # print ' ', filename
- print '.',
- fname = os.path.join(glyphsDir,filename)
- flat_xml = convert2xml.fromData(dict, fname)
-
- if buildXML:
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
-
- gp = GParser(flat_xml)
- for i in xrange(0, gp.count):
- path = gp.getPath(i)
- maxh, maxw = gp.getGlyphDim(i)
- fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
- glyfile.write(fullpath)
- gd.addGlyph(counter * 256 + i, fullpath)
- counter += 1
- glyfile.write('</defs>\n')
- glyfile.write('</svg>\n')
- glyfile.close()
- print " "
-
-
- # start up the html
- # also build up tocentries while processing html
- htmlFileName = "book.html"
- hlst = []
- hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
- hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
- hlst.append('<head>\n')
- hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
- hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
- hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
- hlst.append('</head>\n<body>\n')
-
- print 'Processing Pages'
- # Books are at 1440 DPI. This is rendering at twice that size for
- # readability when rendering to the screen.
- scaledpi = 1440.0
-
- filenames = os.listdir(pageDir)
- filenames = sorted(filenames)
- numfiles = len(filenames)
-
- xmllst = []
- elst = []
-
- for filename in filenames:
- # print ' ', filename
- print ".",
- fname = os.path.join(pageDir,filename)
- flat_xml = convert2xml.fromData(dict, fname)
-
- # keep flat_xml for later svg processing
- xmllst.append(flat_xml)
-
- if buildXML:
- xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
- file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+ name = item
+ argres = []
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ if (name.endswith(path)):
+ result = argres
+ return result
- # first get the html
- pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
- elst.append(tocinfo)
- hlst.append(pagehtml)
+ def getDataTemp(self, path):
+ result = None
+ cnt = len(self.temp)
+ for j in xrange(cnt):
+ item = self.temp[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name.endswith(path)):
+ result = argres
+ self.temp.pop(j)
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
- # finish up the html string and output it
- hlst.append('</body>\n</html>\n')
- htmlstr = "".join(hlst)
- hlst = None
- file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+ def getImages(self):
+ result = []
+ self.temp = self.flatdoc
+ while (self.getDataTemp('img') != None):
+ h = self.getDataTemp('img.h')[0]
+ w = self.getDataTemp('img.w')[0]
+ x = self.getDataTemp('img.x')[0]
+ y = self.getDataTemp('img.y')[0]
+ src = self.getDataTemp('img.src')[0]
+ result.append('<image xlink:href="../img/img%04d.jpg" x="%d" y="%d" width="%d" height="%d" />\n' % (src, x, y, w, h))
+ return result
- print " "
- print 'Extracting Table of Contents from Amazon OCR'
+ def getGlyphs(self):
+ result = []
+ if (self.gid != None) and (len(self.gid) > 0):
+ glyphs = []
+ for j in set(self.gid):
+ glyphs.append(j)
+ glyphs.sort()
+ for gid in glyphs:
+ id='id="gl%d"' % gid
+ path = self.gd.lookup(id)
+ if path:
+ result.append(id + ' ' + path)
+ return result
- # first create a table of contents file for the svg images
- tlst = []
- tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
- tlst.append('<head>\n')
- tlst.append('<title>' + meta_array['Title'] + '</title>\n')
- tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- tlst.append('</head>\n')
- tlst.append('<body>\n')
- tlst.append('<h2>Table of Contents</h2>\n')
- start = pageidnums[0]
+def convert2SVG(gdict, flat_xml, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi):
+ mlst = []
+ pp = PParser(gdict, flat_xml, meta_array)
+ mlst.append('<?xml version="1.0" standalone="no"?>\n')
if (raw):
- startname = 'page%04d.svg' % start
+ mlst.append('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ mlst.append('<svg width="%fin" height="%fin" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1">\n' % (pp.pw / scaledpi, pp.ph / scaledpi, pp.pw -1, pp.ph -1))
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
else:
- startname = 'page%04d.xhtml' % start
-
- tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
- # build up a table of contents for the svg xhtml output
- tocentries = "".join(elst)
- elst = None
- toclst = tocentries.split('\n')
- toclst.pop()
- for entry in toclst:
- print entry
- title, pagenum = entry.split('|')
- id = pageidnums[int(pagenum)]
- if (raw):
- fname = 'page%04d.svg' % id
+ mlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ mlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" ><head>\n')
+ mlst.append('<title>Page %d - %s by %s</title>\n' % (pageid, meta_array['Title'],meta_array['Authors']))
+ mlst.append('<script><![CDATA[\n')
+ mlst.append('function gd(){var p=window.location.href.replace(/^.*\?dpi=(\d+).*$/i,"$1");return p;}\n')
+ mlst.append('var dpi=%d;\n' % scaledpi)
+ if (previd) :
+ mlst.append('var prevpage="page%04d.xhtml";\n' % (previd))
+ if (nextid) :
+ mlst.append('var nextpage="page%04d.xhtml";\n' % (nextid))
+ mlst.append('var pw=%d;var ph=%d;' % (pp.pw, pp.ph))
+ mlst.append('function zoomin(){dpi=dpi*(0.8);setsize();}\n')
+ mlst.append('function zoomout(){dpi=dpi*1.25;setsize();}\n')
+ mlst.append('function setsize(){var svg=document.getElementById("svgimg");var prev=document.getElementById("prevsvg");var next=document.getElementById("nextsvg");var width=(pw/dpi)+"in";var height=(ph/dpi)+"in";svg.setAttribute("width",width);svg.setAttribute("height",height);prev.setAttribute("height",height);prev.setAttribute("width","50px");next.setAttribute("height",height);next.setAttribute("width","50px");}\n')
+ mlst.append('function ppage(){window.location.href=prevpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('function npage(){window.location.href=nextpage+"?dpi="+Math.round(dpi);}\n')
+ mlst.append('var gt=gd();if(gt>0){dpi=gt;}\n')
+ mlst.append('window.onload=setsize;\n')
+ mlst.append(']]></script>\n')
+ mlst.append('</head>\n')
+ mlst.append('<body onLoad="setsize();" style="background-color:#777;text-align:center;">\n')
+ mlst.append('<div style="white-space:nowrap;">\n')
+ if previd == None:
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
else:
- fname = 'page%04d.xhtml' % id
- tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
- tlst.append('</body>\n')
- tlst.append('</html>\n')
- tochtml = "".join(tlst)
- file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
-
-
- # now create index_svg.xhtml that points to all required files
- slst = []
- slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
- slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
- slst.append('<head>\n')
- slst.append('<title>' + meta_array['Title'] + '</title>\n')
- slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
- slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
- if 'ASIN' in meta_array:
- slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
- if 'GUID' in meta_array:
- slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
- slst.append('</head>\n')
- slst.append('<body>\n')
-
- print "Building svg images of each book page"
- slst.append('<h2>List of Pages</h2>\n')
- slst.append('<div>\n')
- idlst = sorted(pageIDMap.keys())
- numids = len(idlst)
- cnt = len(idlst)
- previd = None
- for j in range(cnt):
- pageid = idlst[j]
- if j < cnt - 1:
- nextid = idlst[j+1]
- else:
- nextid = None
- print '.',
- pagelst = pageIDMap[pageid]
- flst = []
- for page in pagelst:
- flst.append(xmllst[page])
- flat_svg = "".join(flst)
- flst=None
- svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
- if (raw) :
- pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
- slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
+ mlst.append('<a href="javascript:ppage();"><svg id="prevsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,150,95,5,95,295" fill="#AAAAAA" /></svg></a>\n')
+
+ mlst.append('<a href="javascript:npage();"><svg id="svgimg" viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" style="background-color:#FFF;border:1px solid black;">' % (pp.pw, pp.ph))
+ if (pp.gid != None):
+ mlst.append('<defs>\n')
+ gdefs = pp.getGlyphs()
+ for j in xrange(0,len(gdefs)):
+ mlst.append(gdefs[j])
+ mlst.append('</defs>\n')
+ img = pp.getImages()
+ if (img != None):
+ for j in xrange(0,len(img)):
+ mlst.append(img[j])
+ if (pp.gid != None):
+ for j in xrange(0,len(pp.gid)):
+ mlst.append('<use xlink:href="#gl%d" x="%d" y="%d" />\n' % (pp.gid[j], pp.gx[j], pp.gy[j]))
+ if (img == None or len(img) == 0) and (pp.gid == None or len(pp.gid) == 0):
+ xpos = "%d" % (pp.pw // 3)
+ ypos = "%d" % (pp.ph // 3)
+ mlst.append('<text x="' + xpos + '" y="' + ypos + '" font-size="' + meta_array['fontSize'] + '" font-family="Helvetica" stroke="black">This page intentionally left blank.</text>\n')
+ if (raw) :
+ mlst.append('</svg>')
+ else :
+ mlst.append('</svg></a>\n')
+ if nextid == None:
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"></svg></a>\n')
else :
- pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
- slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
- previd = pageid
- pfile.write(svgxml)
- pfile.close()
- counter += 1
- slst.append('</div>\n')
- slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
- slst.append('</body>\n</html>\n')
- svgindex = "".join(slst)
- slst = None
- file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
-
- print " "
-
- # build the opf file
- opfname = os.path.join(bookDir, 'book.opf')
- olst = []
- olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
- olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
- # adding metadata
- olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
- if 'GUID' in meta_array:
- olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
- if 'ASIN' in meta_array:
- olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
- if 'oASIN' in meta_array:
- olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
- olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n')
- olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
- olst.append(' <dc:language>en</dc:language>\n')
- olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
- if isCover:
- olst.append(' <meta name="cover" content="bookcover"/>\n')
- olst.append(' </metadata>\n')
- olst.append('<manifest>\n')
- olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
- olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
- # adding image files to manifest
- filenames = os.listdir(imgDir)
- filenames = sorted(filenames)
- for filename in filenames:
- imgname, imgext = os.path.splitext(filename)
- if imgext == '.jpg':
- imgext = 'jpeg'
- if imgext == '.svg':
- imgext = 'svg+xml'
- olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
- if isCover:
- olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
- olst.append('</manifest>\n')
- # adding spine
- olst.append('<spine>\n <itemref idref="book" />\n</spine>\n')
- if isCover:
- olst.append(' <guide>\n')
- olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n')
- olst.append(' </guide>\n')
- olst.append('</package>\n')
- opfstr = "".join(olst)
- olst = None
- file(opfname, 'wb').write(opfstr)
-
- print 'Processing Complete'
-
- return 0
-
-def usage():
- print "genbook.py generates a book from the extract Topaz Files"
- print "Usage:"
- print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
- print " "
- print "Options:"
- print " -h : help - print this usage message"
- print " -r : generate raw svg files (not wrapped in xhtml)"
- print " --fixed-image : genearate any Fixed Area as an svg image in the html"
- print " "
-
-
-def main(argv):
- bookDir = ''
- if len(argv) == 0:
- argv = sys.argv
-
- try:
- opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
-
- except getopt.GetoptError, err:
- print str(err)
- usage()
- return 1
-
- if len(opts) == 0 and len(args) == 0 :
- usage()
- return 1
-
- raw = 0
- fixedimage = True
- for o, a in opts:
- if o =="-h":
- usage()
- return 0
- if o =="-r":
- raw = 1
- if o =="--fixed-image":
- fixedimage = True
-
- bookDir = args[0]
-
- rv = generateBook(bookDir, raw, fixedimage)
- return rv
-
-
-if __name__ == '__main__':
- sys.exit(main(''))
+ mlst.append('<a href="javascript:npage();"><svg id="nextsvg" viewBox="0 0 100 300" xmlns="http://www.w3.org/2000/svg" version="1.1" style="background-color:#777"><polygon points="5,5,5,295,95,150" fill="#AAAAAA" /></svg></a>\n')
+ mlst.append('</div>\n')
+ mlst.append('<div><a href="javascript:zoomin();">zoom in</a> - <a href="javascript:zoomout();">zoom out</a></div>\n')
+ mlst.append('</body>\n')
+ mlst.append('</html>\n')
+ return "".join(mlst)
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py b/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py
index ac73d1e..3ed925d 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/genbook.py
@@ -1,452 +1,721 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from __future__ import with_statement
-
-# ignobleepub.pyw, version 3.8
-# Copyright © 2009-2010 by i♥cabbages
-
-# Released under the terms of the GNU General Public Licence, version 3
-# <http://www.gnu.org/licenses/>
-
-# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf
-
-# Windows users: Before running this program, you must first install Python 2.6
-# from <http://www.python.org/download/> and PyCrypto from
-# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
-# install the version for Python 2.6). Save this script file as
-# ineptepub.pyw and double-click on it to run it.
-#
-# Mac OS X users: Save this script file as ineptepub.pyw. You can run this
-# program from the command line (pythonw ineptepub.pyw) or by double-clicking
-# it when it has been associated with PythonLauncher.
-
-# Revision history:
-# 1 - Initial release
-# 2 - Added OS X support by using OpenSSL when available
-# 3 - screen out improper key lengths to prevent segfaults on Linux
-# 3.1 - Allow Windows versions of libcrypto to be found
-# 3.2 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
-# 3.3 - On Windows try PyCrypto first, OpenSSL next
-# 3.4 - Modify interface to allow use with import
-# 3.5 - Fix for potential problem with PyCrypto
-# 3.6 - Revised to allow use in calibre plugins to eliminate need for duplicate code
-# 3.7 - Tweaked to match ineptepub more closely
-# 3.8 - Fixed to retain zip file metadata (e.g. file modification date)
-# 3.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 4.0 - Work if TkInter is missing
-
-"""
-Decrypt Barnes & Noble encrypted ePub books.
-"""
-
-__license__ = 'GPL v3'
-__version__ = "4.0"
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-import sys
-import os
-import traceback
-import zlib
-import zipfile
-from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
-from contextlib import closing
-import xml.etree.ElementTree as etree
-
-# Wrap a stream so that output gets flushed immediately
-# and also make sure that any unicode strings get
-# encoded using "replace" before writing them.
-class SafeUnbuffered:
+class Unbuffered:
def __init__(self, stream):
self.stream = stream
- self.encoding = stream.encoding
- if self.encoding == None:
- self.encoding = "utf-8"
def write(self, data):
- if isinstance(data,unicode):
- data = data.encode(self.encoding,"replace")
self.stream.write(data)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)
-try:
- from calibre.constants import iswindows, isosx
-except:
- iswindows = sys.platform.startswith('win')
- isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
- if iswindows:
- # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
- # strings.
-
- # Versions 2.x of Python don't support Unicode in sys.argv on
- # Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
-
- from ctypes import POINTER, byref, cdll, c_int, windll
- from ctypes.wintypes import LPCWSTR, LPWSTR
-
- GetCommandLineW = cdll.kernel32.GetCommandLineW
- GetCommandLineW.argtypes = []
- GetCommandLineW.restype = LPCWSTR
-
- CommandLineToArgvW = windll.shell32.CommandLineToArgvW
- CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
- CommandLineToArgvW.restype = POINTER(LPWSTR)
-
- cmd = GetCommandLineW()
- argc = c_int(0)
- argv = CommandLineToArgvW(cmd, byref(argc))
- if argc.value > 0:
- # Remove Python executable and commands if present
- start = argc.value - len(sys.argv)
- return [argv[i] for i in
- xrange(start, argc.value)]
- return [u"ineptepub.py"]
- else:
- argvencoding = sys.stdin.encoding
- if argvencoding == None:
- argvencoding = "utf-8"
- return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+import sys
+sys.stdout=Unbuffered(sys.stdout)
+import csv
+import os
+import getopt
+from struct import pack
+from struct import unpack
-class IGNOBLEError(Exception):
+class TpzDRMError(Exception):
pass
-def _load_crypto_libcrypto():
- from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, cast
- from ctypes.util import find_library
+# local support routines
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+if inCalibre :
+ from calibre_plugins.dedrm import convert2xml
+ from calibre_plugins.dedrm import flatxml2html
+ from calibre_plugins.dedrm import flatxml2svg
+ from calibre_plugins.dedrm import stylexml2css
+else :
+ import convert2xml
+ import flatxml2html
+ import flatxml2svg
+ import stylexml2css
+
+# global switch
+buildXML = False
+
+# Get a 7 bit encoded number from a file
+def readEncodedNumber(file):
+ flag = False
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data == 0xFF:
+ flag = True
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ c = file.read(1)
+ if (len(c) == 0):
+ return None
+ data = ord(c)
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from the file
+def lengthPrefixString(data):
+ return encodeNumber(len(data))+data
+
+def readString(file):
+ stringLength = readEncodedNumber(file)
+ if (stringLength == None):
+ return None
+ sv = file.read(stringLength)
+ if (len(sv) != stringLength):
+ return ""
+ return unpack(str(stringLength)+"s",sv)[0]
+
+def getMetaArray(metaFile):
+ # parse the meta file
+ result = {}
+ fo = file(metaFile,'rb')
+ size = readEncodedNumber(fo)
+ for i in xrange(size):
+ tag = readString(fo)
+ value = readString(fo)
+ result[tag] = value
+ # print tag, value
+ fo.close()
+ return result
+
+
+# dictionary of all text strings by index value
+class Dictionary(object):
+ def __init__(self, dictFile):
+ self.filename = dictFile
+ self.size = 0
+ self.fo = file(dictFile,'rb')
+ self.stable = []
+ self.size = readEncodedNumber(self.fo)
+ for i in xrange(self.size):
+ self.stable.append(self.escapestr(readString(self.fo)))
+ self.pos = 0
+ def escapestr(self, str):
+ str = str.replace('&','&amp;')
+ str = str.replace('<','&lt;')
+ str = str.replace('>','&gt;')
+ str = str.replace('=','&#61;')
+ return str
+ def lookup(self,val):
+ if ((val >= 0) and (val < self.size)) :
+ self.pos = val
+ return self.stable[self.pos]
+ else:
+ print "Error: %d outside of string table limits" % val
+ raise TpzDRMError('outside or string table limits')
+ # sys.exit(-1)
+ def getSize(self):
+ return self.size
+ def getPos(self):
+ return self.pos
+
+
+class PageDimParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ # find tag if within pos to end inclusive
+ def findinDoc(self, tagpath, pos, end) :
+ result = None
+ docList = self.flatdoc
+ cnt = len(docList)
+ if end == -1 :
+ end = cnt
+ else:
+ end = min(cnt,end)
+ foundat = -1
+ for j in xrange(pos, end):
+ item = docList[j]
+ if item.find('=') >= 0:
+ (name, argres) = item.split('=')
+ else :
+ name = item
+ argres = ''
+ if name.endswith(tagpath) :
+ result = argres
+ foundat = j
+ break
+ return foundat, result
+ def process(self):
+ (pos, sph) = self.findinDoc('page.h',0,-1)
+ (pos, spw) = self.findinDoc('page.w',0,-1)
+ if (sph == None): sph = '-1'
+ if (spw == None): spw = '-1'
+ return sph, spw
+
+def getPageDim(flatxml):
+ # create a document parser
+ dp = PageDimParser(flatxml)
+ (ph, pw) = dp.process()
+ return ph, pw
+
+class GParser(object):
+ def __init__(self, flatxml):
+ self.flatdoc = flatxml.split('\n')
+ self.dpi = 1440
+ self.gh = self.getData('info.glyph.h')
+ self.gw = self.getData('info.glyph.w')
+ self.guse = self.getData('info.glyph.use')
+ if self.guse :
+ self.count = len(self.guse)
+ else :
+ self.count = 0
+ self.gvtx = self.getData('info.glyph.vtx')
+ self.glen = self.getData('info.glyph.len')
+ self.gdpi = self.getData('info.glyph.dpi')
+ self.vx = self.getData('info.vtx.x')
+ self.vy = self.getData('info.vtx.y')
+ self.vlen = self.getData('info.len.n')
+ if self.vlen :
+ self.glen.append(len(self.vlen))
+ elif self.glen:
+ self.glen.append(0)
+ if self.vx :
+ self.gvtx.append(len(self.vx))
+ elif self.gvtx :
+ self.gvtx.append(0)
+ def getData(self, path):
+ result = None
+ cnt = len(self.flatdoc)
+ for j in xrange(cnt):
+ item = self.flatdoc[j]
+ if item.find('=') >= 0:
+ (name, argt) = item.split('=')
+ argres = argt.split('|')
+ else:
+ name = item
+ argres = []
+ if (name == path):
+ result = argres
+ break
+ if (len(argres) > 0) :
+ for j in xrange(0,len(argres)):
+ argres[j] = int(argres[j])
+ return result
+ def getGlyphDim(self, gly):
+ if self.gdpi[gly] == 0:
+ return 0, 0
+ maxh = (self.gh[gly] * self.dpi) / self.gdpi[gly]
+ maxw = (self.gw[gly] * self.dpi) / self.gdpi[gly]
+ return maxh, maxw
+ def getPath(self, gly):
+ path = ''
+ if (gly < 0) or (gly >= self.count):
+ return path
+ tx = self.vx[self.gvtx[gly]:self.gvtx[gly+1]]
+ ty = self.vy[self.gvtx[gly]:self.gvtx[gly+1]]
+ p = 0
+ for k in xrange(self.glen[gly], self.glen[gly+1]):
+ if (p == 0):
+ zx = tx[0:self.vlen[k]+1]
+ zy = ty[0:self.vlen[k]+1]
+ else:
+ zx = tx[self.vlen[k-1]+1:self.vlen[k]+1]
+ zy = ty[self.vlen[k-1]+1:self.vlen[k]+1]
+ p += 1
+ j = 0
+ while ( j < len(zx) ):
+ if (j == 0):
+ # Start Position.
+ path += 'M %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly])
+ elif (j <= len(zx)-3):
+ # Cubic Bezier Curve
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[j+2] * self.dpi / self.gdpi[gly], zy[j+2] * self.dpi / self.gdpi[gly])
+ j += 2
+ elif (j == len(zx)-2):
+ # Cubic Bezier Curve to Start Position
+ path += 'C %d %d %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[j+1] * self.dpi / self.gdpi[gly], zy[j+1] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+ j += 1
+ elif (j == len(zx)-1):
+ # Quadratic Bezier Curve to Start Position
+ path += 'Q %d %d %d %d ' % (zx[j] * self.dpi / self.gdpi[gly], zy[j] * self.dpi / self.gdpi[gly], zx[0] * self.dpi / self.gdpi[gly], zy[0] * self.dpi / self.gdpi[gly])
+
+ j += 1
+ path += 'z'
+ return path
+
+
+
+# dictionary of all text strings by index value
+class GlyphDict(object):
+ def __init__(self):
+ self.gdict = {}
+ def lookup(self, id):
+ # id='id="gl%d"' % val
+ if id in self.gdict:
+ return self.gdict[id]
+ return None
+ def addGlyph(self, val, path):
+ id='id="gl%d"' % val
+ self.gdict[id] = path
+
+
+def generateBook(bookDir, raw, fixedimage):
+ # sanity check Topaz file extraction
+ if not os.path.exists(bookDir) :
+ print "Can not find directory with unencrypted book"
+ return 1
+
+ dictFile = os.path.join(bookDir,'dict0000.dat')
+ if not os.path.exists(dictFile) :
+ print "Can not find dict0000.dat file"
+ return 1
+
+ pageDir = os.path.join(bookDir,'page')
+ if not os.path.exists(pageDir) :
+ print "Can not find page directory in unencrypted book"
+ return 1
+
+ imgDir = os.path.join(bookDir,'img')
+ if not os.path.exists(imgDir) :
+ print "Can not find image directory in unencrypted book"
+ return 1
- if iswindows:
- libcrypto = find_library('libeay32')
+ glyphsDir = os.path.join(bookDir,'glyphs')
+ if not os.path.exists(glyphsDir) :
+ print "Can not find glyphs directory in unencrypted book"
+ return 1
+
+ metaFile = os.path.join(bookDir,'metadata0000.dat')
+ if not os.path.exists(metaFile) :
+ print "Can not find metadata0000.dat in unencrypted book"
+ return 1
+
+ svgDir = os.path.join(bookDir,'svg')
+ if not os.path.exists(svgDir) :
+ os.makedirs(svgDir)
+
+ if buildXML:
+ xmlDir = os.path.join(bookDir,'xml')
+ if not os.path.exists(xmlDir) :
+ os.makedirs(xmlDir)
+
+ otherFile = os.path.join(bookDir,'other0000.dat')
+ if not os.path.exists(otherFile) :
+ print "Can not find other0000.dat in unencrypted book"
+ return 1
+
+ print "Updating to color images if available"
+ spath = os.path.join(bookDir,'color_img')
+ dpath = os.path.join(bookDir,'img')
+ filenames = os.listdir(spath)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname = filename.replace('color','img')
+ sfile = os.path.join(spath,filename)
+ dfile = os.path.join(dpath,imgname)
+ imgdata = file(sfile,'rb').read()
+ file(dfile,'wb').write(imgdata)
+
+ print "Creating cover.jpg"
+ isCover = False
+ cpath = os.path.join(bookDir,'img')
+ cpath = os.path.join(cpath,'img0000.jpg')
+ if os.path.isfile(cpath):
+ cover = file(cpath, 'rb').read()
+ cpath = os.path.join(bookDir,'cover.jpg')
+ file(cpath, 'wb').write(cover)
+ isCover = True
+
+
+ print 'Processing Dictionary'
+ dict = Dictionary(dictFile)
+
+ print 'Processing Meta Data and creating OPF'
+ meta_array = getMetaArray(metaFile)
+
+ # replace special chars in title and authors like & < >
+ title = meta_array.get('Title','No Title Provided')
+ title = title.replace('&','&amp;')
+ title = title.replace('<','&lt;')
+ title = title.replace('>','&gt;')
+ meta_array['Title'] = title
+ authors = meta_array.get('Authors','No Authors Provided')
+ authors = authors.replace('&','&amp;')
+ authors = authors.replace('<','&lt;')
+ authors = authors.replace('>','&gt;')
+ meta_array['Authors'] = authors
+
+ if buildXML:
+ xname = os.path.join(xmlDir, 'metadata.xml')
+ mlst = []
+ for key in meta_array:
+ mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n')
+ metastr = "".join(mlst)
+ mlst = None
+ file(xname, 'wb').write(metastr)
+
+ print 'Processing StyleSheet'
+
+ # get some scaling info from metadata to use while processing styles
+ # and first page info
+
+ fontsize = '135'
+ if 'fontSize' in meta_array:
+ fontsize = meta_array['fontSize']
+
+ # also get the size of a normal text page
+ # get the total number of pages unpacked as a safety check
+ filenames = os.listdir(pageDir)
+ numfiles = len(filenames)
+
+ spage = '1'
+ if 'firstTextPage' in meta_array:
+ spage = meta_array['firstTextPage']
+ pnum = int(spage)
+ if pnum >= numfiles or pnum < 0:
+ # metadata is wrong so just select a page near the front
+ # 10% of the book to get a normal text page
+ pnum = int(0.10 * numfiles)
+ # print "first normal text page is", spage
+
+ # get page height and width from first text page for use in stylesheet scaling
+ pname = 'page%04d.dat' % (pnum + 1)
+ fname = os.path.join(pageDir,pname)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ (ph, pw) = getPageDim(flat_xml)
+ if (ph == '-1') or (ph == '0') : ph = '11000'
+ if (pw == '-1') or (pw == '0') : pw = '8500'
+ meta_array['pageHeight'] = ph
+ meta_array['pageWidth'] = pw
+ if 'fontSize' not in meta_array.keys():
+ meta_array['fontSize'] = fontsize
+
+ # process other.dat for css info and for map of page files to svg images
+ # this map is needed because some pages actually are made up of multiple
+ # pageXXXX.xml files
+ xname = os.path.join(bookDir, 'style.css')
+ flat_xml = convert2xml.fromData(dict, otherFile)
+
+ # extract info.original.pid to get original page information
+ pageIDMap = {}
+ pageidnums = stylexml2css.getpageIDMap(flat_xml)
+ if len(pageidnums) == 0:
+ filenames = os.listdir(pageDir)
+ numfiles = len(filenames)
+ for k in range(numfiles):
+ pageidnums.append(k)
+ # create a map from page ids to list of page file nums to process for that page
+ for i in range(len(pageidnums)):
+ id = pageidnums[i]
+ if id in pageIDMap.keys():
+ pageIDMap[id].append(i)
+ else:
+ pageIDMap[id] = [i]
+
+ # now get the css info
+ cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw)
+ file(xname, 'wb').write(cssstr)
+ if buildXML:
+ xname = os.path.join(xmlDir, 'other0000.xml')
+ file(xname, 'wb').write(convert2xml.getXML(dict, otherFile))
+
+ print 'Processing Glyphs'
+ gd = GlyphDict()
+ filenames = os.listdir(glyphsDir)
+ filenames = sorted(filenames)
+ glyfname = os.path.join(svgDir,'glyphs.svg')
+ glyfile = open(glyfname, 'w')
+ glyfile.write('<?xml version="1.0" standalone="no"?>\n')
+ glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n')
+ glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n')
+ glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title'])
+ glyfile.write('<defs>\n')
+ counter = 0
+ for filename in filenames:
+ # print ' ', filename
+ print '.',
+ fname = os.path.join(glyphsDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ if buildXML:
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ gp = GParser(flat_xml)
+ for i in xrange(0, gp.count):
+ path = gp.getPath(i)
+ maxh, maxw = gp.getGlyphDim(i)
+ fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh)
+ glyfile.write(fullpath)
+ gd.addGlyph(counter * 256 + i, fullpath)
+ counter += 1
+ glyfile.write('</defs>\n')
+ glyfile.write('</svg>\n')
+ glyfile.close()
+ print " "
+
+
+ # start up the html
+ # also build up tocentries while processing html
+ htmlFileName = "book.html"
+ hlst = []
+ hlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n')
+ hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n')
+ hlst.append('<head>\n')
+ hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n')
+ hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n')
+ hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
+ if 'ASIN' in meta_array:
+ hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
+ if 'GUID' in meta_array:
+ hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n')
+ hlst.append('</head>\n<body>\n')
+
+ print 'Processing Pages'
+ # Books are at 1440 DPI. This is rendering at twice that size for
+ # readability when rendering to the screen.
+ scaledpi = 1440.0
+
+ filenames = os.listdir(pageDir)
+ filenames = sorted(filenames)
+ numfiles = len(filenames)
+
+ xmllst = []
+ elst = []
+
+ for filename in filenames:
+ # print ' ', filename
+ print ".",
+ fname = os.path.join(pageDir,filename)
+ flat_xml = convert2xml.fromData(dict, fname)
+
+ # keep flat_xml for later svg processing
+ xmllst.append(flat_xml)
+
+ if buildXML:
+ xname = os.path.join(xmlDir, filename.replace('.dat','.xml'))
+ file(xname, 'wb').write(convert2xml.getXML(dict, fname))
+
+ # first get the html
+ pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage)
+ elst.append(tocinfo)
+ hlst.append(pagehtml)
+
+ # finish up the html string and output it
+ hlst.append('</body>\n</html>\n')
+ htmlstr = "".join(hlst)
+ hlst = None
+ file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr)
+
+ print " "
+ print 'Extracting Table of Contents from Amazon OCR'
+
+ # first create a table of contents file for the svg images
+ tlst = []
+ tlst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
+ tlst.append('<head>\n')
+ tlst.append('<title>' + meta_array['Title'] + '</title>\n')
+ tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
+ if 'ASIN' in meta_array:
+ tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
+ if 'GUID' in meta_array:
+ tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ tlst.append('</head>\n')
+ tlst.append('<body>\n')
+
+ tlst.append('<h2>Table of Contents</h2>\n')
+ start = pageidnums[0]
+ if (raw):
+ startname = 'page%04d.svg' % start
else:
- libcrypto = find_library('crypto')
-
- if libcrypto is None:
- raise IGNOBLEError('libcrypto not found')
- libcrypto = CDLL(libcrypto)
-
- AES_MAXNR = 14
-
- c_char_pp = POINTER(c_char_p)
- c_int_p = POINTER(c_int)
-
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
- ('rounds', c_int)]
- AES_KEY_p = POINTER(AES_KEY)
-
- def F(restype, name, argtypes):
- func = getattr(libcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
- [c_char_p, c_int, AES_KEY_p])
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
- [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
- c_int])
-
- class AES(object):
- def __init__(self, userkey):
- self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise IGNOBLEError('AES improper key used')
- return
- key = self._key = AES_KEY()
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
- if rv < 0:
- raise IGNOBLEError('Failed to initialize AES key')
-
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- iv = ("\x00" * self._blocksize)
- rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
- if rv == 0:
- raise IGNOBLEError('AES decryption failed')
- return out.raw
-
- return AES
-
-def _load_crypto_pycrypto():
- from Crypto.Cipher import AES as _AES
-
- class AES(object):
- def __init__(self, key):
- self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16)
-
- def decrypt(self, data):
- return self._aes.decrypt(data)
-
- return AES
-
-def _load_crypto():
- AES = None
- cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
- if sys.platform.startswith('win'):
- cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
- for loader in cryptolist:
- try:
- AES = loader()
- break
- except (ImportError, IGNOBLEError):
- pass
- return AES
-
-AES = _load_crypto()
-
-META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
-NSMAP = {'adept': 'http://ns.adobe.com/adept',
- 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
-
-class Decryptor(object):
- def __init__(self, bookkey, encryption):
- enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag)
- self._aes = AES(bookkey)
- encryption = etree.fromstring(encryption)
- self._encrypted = encrypted = set()
- expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
- enc('CipherReference'))
- for elem in encryption.findall(expr):
- path = elem.get('URI', None)
- if path is not None:
- path = path.encode('utf-8')
- encrypted.add(path)
-
- def decompress(self, bytes):
- dc = zlib.decompressobj(-15)
- bytes = dc.decompress(bytes)
- ex = dc.decompress('Z') + dc.flush()
- if ex:
- bytes = bytes + ex
- return bytes
-
- def decrypt(self, path, data):
- if path in self._encrypted:
- data = self._aes.decrypt(data)[16:]
- data = data[:-ord(data[-1])]
- data = self.decompress(data)
- return data
-
-# check file to make check whether it's probably an Adobe Adept encrypted ePub
-def ignobleBook(inpath):
- with closing(ZipFile(open(inpath, 'rb'))) as inf:
- namelist = set(inf.namelist())
- if 'META-INF/rights.xml' not in namelist or \
- 'META-INF/encryption.xml' not in namelist:
- return False
- try:
- rights = etree.fromstring(inf.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) == 64:
- return True
- except:
- # if we couldn't check, assume it is
- return True
- return False
-
-def decryptBook(keyb64, inpath, outpath):
- if AES is None:
- raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.")
- key = keyb64.decode('base64')[:16]
- aes = AES(key)
- with closing(ZipFile(open(inpath, 'rb'))) as inf:
- namelist = set(inf.namelist())
- if 'META-INF/rights.xml' not in namelist or \
- 'META-INF/encryption.xml' not in namelist:
- print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
- return 1
- for name in META_NAMES:
- namelist.remove(name)
- try:
- rights = etree.fromstring(inf.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) != 64:
- print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath))
- return 1
- bookkey = aes.decrypt(bookkey.decode('base64'))
- bookkey = bookkey[:-ord(bookkey[-1])]
- encryption = inf.read('META-INF/encryption.xml')
- decryptor = Decryptor(bookkey[-16:], encryption)
- kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
- with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
- zi = ZipInfo('mimetype')
- zi.compress_type=ZIP_STORED
- try:
- # if the mimetype is present, get its info, including time-stamp
- oldzi = inf.getinfo('mimetype')
- # copy across fields to be preserved
- zi.date_time = oldzi.date_time
- zi.comment = oldzi.comment
- zi.extra = oldzi.extra
- zi.internal_attr = oldzi.internal_attr
- # external attributes are dependent on the create system, so copy both.
- zi.external_attr = oldzi.external_attr
- zi.create_system = oldzi.create_system
- except:
- pass
- outf.writestr(zi, inf.read('mimetype'))
- for path in namelist:
- data = inf.read(path)
- zi = ZipInfo(path)
- zi.compress_type=ZIP_DEFLATED
- try:
- # get the file info, including time-stamp
- oldzi = inf.getinfo(path)
- # copy across useful fields
- zi.date_time = oldzi.date_time
- zi.comment = oldzi.comment
- zi.extra = oldzi.extra
- zi.internal_attr = oldzi.internal_attr
- # external attributes are dependent on the create system, so copy both.
- zi.external_attr = oldzi.external_attr
- zi.create_system = oldzi.create_system
- except:
- pass
- outf.writestr(zi, decryptor.decrypt(path, data))
- except:
- print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())
- return 2
+ startname = 'page%04d.xhtml' % start
+
+ tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n')
+ # build up a table of contents for the svg xhtml output
+ tocentries = "".join(elst)
+ elst = None
+ toclst = tocentries.split('\n')
+ toclst.pop()
+ for entry in toclst:
+ print entry
+ title, pagenum = entry.split('|')
+ id = pageidnums[int(pagenum)]
+ if (raw):
+ fname = 'page%04d.svg' % id
+ else:
+ fname = 'page%04d.xhtml' % id
+ tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n')
+ tlst.append('</body>\n')
+ tlst.append('</html>\n')
+ tochtml = "".join(tlst)
+ file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml)
+
+
+ # now create index_svg.xhtml that points to all required files
+ slst = []
+ slst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
+ slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >')
+ slst.append('<head>\n')
+ slst.append('<title>' + meta_array['Title'] + '</title>\n')
+ slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n')
+ slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n')
+ if 'ASIN' in meta_array:
+ slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n')
+ if 'GUID' in meta_array:
+ slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n')
+ slst.append('</head>\n')
+ slst.append('<body>\n')
+
+ print "Building svg images of each book page"
+ slst.append('<h2>List of Pages</h2>\n')
+ slst.append('<div>\n')
+ idlst = sorted(pageIDMap.keys())
+ numids = len(idlst)
+ cnt = len(idlst)
+ previd = None
+ for j in range(cnt):
+ pageid = idlst[j]
+ if j < cnt - 1:
+ nextid = idlst[j+1]
+ else:
+ nextid = None
+ print '.',
+ pagelst = pageIDMap[pageid]
+ flst = []
+ for page in pagelst:
+ flst.append(xmllst[page])
+ flat_svg = "".join(flst)
+ flst=None
+ svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi)
+ if (raw) :
+ pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w')
+ slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid))
+ else :
+ pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w')
+ slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid))
+ previd = pageid
+ pfile.write(svgxml)
+ pfile.close()
+ counter += 1
+ slst.append('</div>\n')
+ slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n')
+ slst.append('</body>\n</html>\n')
+ svgindex = "".join(slst)
+ slst = None
+ file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex)
+
+ print " "
+
+ # build the opf file
+ opfname = os.path.join(bookDir, 'book.opf')
+ olst = []
+ olst.append('<?xml version="1.0" encoding="utf-8"?>\n')
+ olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n')
+ # adding metadata
+ olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n')
+ if 'GUID' in meta_array:
+ olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n')
+ if 'ASIN' in meta_array:
+ olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n')
+ if 'oASIN' in meta_array:
+ olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n')
+ olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n')
+ olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n')
+ olst.append(' <dc:language>en</dc:language>\n')
+ olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n')
+ if isCover:
+ olst.append(' <meta name="cover" content="bookcover"/>\n')
+ olst.append(' </metadata>\n')
+ olst.append('<manifest>\n')
+ olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n')
+ olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n')
+ # adding image files to manifest
+ filenames = os.listdir(imgDir)
+ filenames = sorted(filenames)
+ for filename in filenames:
+ imgname, imgext = os.path.splitext(filename)
+ if imgext == '.jpg':
+ imgext = 'jpeg'
+ if imgext == '.svg':
+ imgext = 'svg+xml'
+ olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n')
+ if isCover:
+ olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n')
+ olst.append('</manifest>\n')
+ # adding spine
+ olst.append('<spine>\n <itemref idref="book" />\n</spine>\n')
+ if isCover:
+ olst.append(' <guide>\n')
+ olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n')
+ olst.append(' </guide>\n')
+ olst.append('</package>\n')
+ opfstr = "".join(olst)
+ olst = None
+ file(opfname, 'wb').write(opfstr)
+
+ print 'Processing Complete'
+
return 0
+def usage():
+ print "genbook.py generates a book from the extract Topaz Files"
+ print "Usage:"
+ print " genbook.py [-r] [-h [--fixed-image] <bookDir> "
+ print " "
+ print "Options:"
+ print " -h : help - print this usage message"
+ print " -r : generate raw svg files (not wrapped in xhtml)"
+ print " --fixed-image : genearate any Fixed Area as an svg image in the html"
+ print " "
-def cli_main():
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- argv=unicode_argv()
- progname = os.path.basename(argv[0])
- if len(argv) != 4:
- print u"usage: {0} <keyfile.b64> <inbook.epub> <outbook.epub>".format(progname)
- return 1
- keypath, inpath, outpath = argv[1:]
- userkey = open(keypath,'rb').read()
- result = decryptBook(userkey, inpath, outpath)
- if result == 0:
- print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
- return result
-def gui_main():
+def main(argv):
+ bookDir = ''
+ if len(argv) == 0:
+ argv = sys.argv
+
try:
- import Tkinter
- import Tkconstants
- import tkMessageBox
- import traceback
- except:
- return cli_main()
-
- class DecryptionDialog(Tkinter.Frame):
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root, border=5)
- self.status = Tkinter.Label(self, text=u"Select files for decryption")
- self.status.pack(fill=Tkconstants.X, expand=1)
- body = Tkinter.Frame(self)
- body.pack(fill=Tkconstants.X, expand=1)
- sticky = Tkconstants.E + Tkconstants.W
- body.grid_columnconfigure(1, weight=2)
- Tkinter.Label(body, text=u"Key file").grid(row=0)
- self.keypath = Tkinter.Entry(body, width=30)
- self.keypath.grid(row=0, column=1, sticky=sticky)
- if os.path.exists(u"bnepubkey.b64"):
- self.keypath.insert(0, u"bnepubkey.b64")
- button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
- button.grid(row=0, column=2)
- Tkinter.Label(body, text=u"Input file").grid(row=1)
- self.inpath = Tkinter.Entry(body, width=30)
- self.inpath.grid(row=1, column=1, sticky=sticky)
- button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
- button.grid(row=1, column=2)
- Tkinter.Label(body, text=u"Output file").grid(row=2)
- self.outpath = Tkinter.Entry(body, width=30)
- self.outpath.grid(row=2, column=1, sticky=sticky)
- button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
- button.grid(row=2, column=2)
- buttons = Tkinter.Frame(self)
- buttons.pack()
- botton = Tkinter.Button(
- buttons, text=u"Decrypt", width=10, command=self.decrypt)
- botton.pack(side=Tkconstants.LEFT)
- Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
- button = Tkinter.Button(
- buttons, text=u"Quit", width=10, command=self.quit)
- button.pack(side=Tkconstants.RIGHT)
-
- def get_keypath(self):
- keypath = tkFileDialog.askopenfilename(
- parent=None, title=u"Select Barnes & Noble \'.b64\' key file",
- defaultextension=u".b64",
- filetypes=[('base64-encoded files', '.b64'),
- ('All Files', '.*')])
- if keypath:
- keypath = os.path.normpath(keypath)
- self.keypath.delete(0, Tkconstants.END)
- self.keypath.insert(0, keypath)
- return
-
- def get_inpath(self):
- inpath = tkFileDialog.askopenfilename(
- parent=None, title=u"Select B&N-encrypted ePub file to decrypt",
- defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
- if inpath:
- inpath = os.path.normpath(inpath)
- self.inpath.delete(0, Tkconstants.END)
- self.inpath.insert(0, inpath)
- return
-
- def get_outpath(self):
- outpath = tkFileDialog.asksaveasfilename(
- parent=None, title=u"Select unencrypted ePub file to produce",
- defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
- if outpath:
- outpath = os.path.normpath(outpath)
- self.outpath.delete(0, Tkconstants.END)
- self.outpath.insert(0, outpath)
- return
-
- def decrypt(self):
- keypath = self.keypath.get()
- inpath = self.inpath.get()
- outpath = self.outpath.get()
- if not keypath or not os.path.exists(keypath):
- self.status['text'] = u"Specified key file does not exist"
- return
- if not inpath or not os.path.exists(inpath):
- self.status['text'] = u"Specified input file does not exist"
- return
- if not outpath:
- self.status['text'] = u"Output file not specified"
- return
- if inpath == outpath:
- self.status['text'] = u"Must have different input and output files"
- return
- userkey = open(keypath,'rb').read()
- self.status['text'] = u"Decrypting..."
- try:
- decrypt_status = decryptBook(userkey, inpath, outpath)
- except Exception, e:
- self.status['text'] = u"Error: {0}".format(e.args[0])
- return
- if decrypt_status == 0:
- self.status['text'] = u"File successfully decrypted"
- else:
- self.status['text'] = u"The was an error decrypting the file."
-
- root = Tkinter.Tk()
- root.title(u"Barnes & Noble ePub Decrypter v.{0}".format(__version__))
- root.resizable(True, False)
- root.minsize(300, 0)
- DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
- root.mainloop()
- return 0
+ opts, args = getopt.getopt(argv[1:], "rh:",["fixed-image"])
+
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ return 1
+
+ if len(opts) == 0 and len(args) == 0 :
+ usage()
+ return 1
+
+ raw = 0
+ fixedimage = True
+ for o, a in opts:
+ if o =="-h":
+ usage()
+ return 0
+ if o =="-r":
+ raw = 1
+ if o =="--fixed-image":
+ fixedimage = True
+
+ bookDir = args[0]
+
+ rv = generateBook(bookDir, raw, fixedimage)
+ return rv
+
if __name__ == '__main__':
- if len(sys.argv) > 1:
- sys.exit(cli_main())
- sys.exit(gui_main())
+ sys.exit(main(''))
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py b/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py
index 5118c87..ac73d1e 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/ignobleepub.py
@@ -3,47 +3,54 @@
from __future__ import with_statement
-# ignoblekeygen.pyw, version 2.5
-# Copyright © 2009-2010 i♥cabbages
+# ignobleepub.pyw, version 3.8
+# Copyright © 2009-2010 by i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>
# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf
-# Windows users: Before running this program, you must first install Python.
-# We recommend ActiveState Python 2.7.X for Windows (x86) from
-# http://www.activestate.com/activepython/downloads.
-# You must also install PyCrypto from
-# http://www.voidspace.org.uk/python/modules.shtml#pycrypto
-# (make certain to install the version for Python 2.7).
-# Then save this script file as ignoblekeygen.pyw and double-click on it to run it.
+# Windows users: Before running this program, you must first install Python 2.6
+# from <http://www.python.org/download/> and PyCrypto from
+# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
+# install the version for Python 2.6). Save this script file as
+# ineptepub.pyw and double-click on it to run it.
#
-# Mac OS X users: Save this script file as ignoblekeygen.pyw. You can run this
-# program from the command line (python ignoblekeygen.pyw) or by double-clicking
+# Mac OS X users: Save this script file as ineptepub.pyw. You can run this
+# program from the command line (pythonw ineptepub.pyw) or by double-clicking
# it when it has been associated with PythonLauncher.
# Revision history:
# 1 - Initial release
-# 2 - Add OS X support by using OpenSSL when available (taken/modified from ineptepub v5)
-# 2.1 - Allow Windows versions of libcrypto to be found
-# 2.2 - On Windows try PyCrypto first and then OpenSSL next
-# 2.3 - Modify interface to allow use of import
-# 2.4 - Improvements to UI and now works in plugins
-# 2.5 - Additional improvement for unicode and plugin support
-# 2.6 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 2.7 - Work if TkInter is missing
+# 2 - Added OS X support by using OpenSSL when available
+# 3 - screen out improper key lengths to prevent segfaults on Linux
+# 3.1 - Allow Windows versions of libcrypto to be found
+# 3.2 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
+# 3.3 - On Windows try PyCrypto first, OpenSSL next
+# 3.4 - Modify interface to allow use with import
+# 3.5 - Fix for potential problem with PyCrypto
+# 3.6 - Revised to allow use in calibre plugins to eliminate need for duplicate code
+# 3.7 - Tweaked to match ineptepub more closely
+# 3.8 - Fixed to retain zip file metadata (e.g. file modification date)
+# 3.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 4.0 - Work if TkInter is missing
"""
-Generate Barnes & Noble EPUB user key from name and credit card number.
+Decrypt Barnes & Noble encrypted ePub books.
"""
__license__ = 'GPL v3'
-__version__ = "2.7"
+__version__ = "4.0"
import sys
import os
-import hashlib
+import traceback
+import zlib
+import zipfile
+from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
+from contextlib import closing
+import xml.etree.ElementTree as etree
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -75,8 +82,8 @@ def unicode_argv():
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
- # as a list of Unicode strings and encode them as utf-8
+ # characters with '?'.
+
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
@@ -97,9 +104,7 @@ def unicode_argv():
start = argc.value - len(sys.argv)
return [argv[i] for i in
xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"ignoblekeygen.py"]
+ return [u"ineptepub.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
@@ -140,26 +145,29 @@ def _load_crypto_libcrypto():
func.argtypes = argtypes
return func
- AES_set_encrypt_key = F(c_int, 'AES_set_encrypt_key',
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
[c_char_p, c_int, AES_KEY_p])
AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
c_int])
class AES(object):
- def __init__(self, userkey, iv):
+ def __init__(self, userkey):
self._blocksize = len(userkey)
- self._iv = iv
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise IGNOBLEError('AES improper key used')
+ return
key = self._key = AES_KEY()
- rv = AES_set_encrypt_key(userkey, len(userkey) * 8, key)
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
if rv < 0:
- raise IGNOBLEError('Failed to initialize AES Encrypt key')
+ raise IGNOBLEError('Failed to initialize AES key')
- def encrypt(self, data):
+ def decrypt(self, data):
out = create_string_buffer(len(data))
- rv = AES_cbc_encrypt(data, out, len(data), self._key, self._iv, 1)
+ iv = ("\x00" * self._blocksize)
+ rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
if rv == 0:
- raise IGNOBLEError('AES encryption failed')
+ raise IGNOBLEError('AES decryption failed')
return out.raw
return AES
@@ -168,11 +176,11 @@ def _load_crypto_pycrypto():
from Crypto.Cipher import AES as _AES
class AES(object):
- def __init__(self, key, iv):
- self._aes = _AES.new(key, _AES.MODE_CBC, iv)
+ def __init__(self, key):
+ self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16)
- def encrypt(self, data):
- return self._aes.encrypt(data)
+ def decrypt(self, data):
+ return self._aes.decrypt(data)
return AES
@@ -191,29 +199,123 @@ def _load_crypto():
AES = _load_crypto()
-def normalize_name(name):
- return ''.join(x for x in name.lower() if x != ' ')
-
-
-def generate_key(name, ccn):
- # remove spaces and case from name and CC numbers.
- if type(name)==unicode:
- name = name.encode('utf-8')
- if type(ccn)==unicode:
- ccn = ccn.encode('utf-8')
-
- name = normalize_name(name) + '\x00'
- ccn = normalize_name(ccn) + '\x00'
-
- name_sha = hashlib.sha1(name).digest()[:16]
- ccn_sha = hashlib.sha1(ccn).digest()[:16]
- both_sha = hashlib.sha1(name + ccn).digest()
- aes = AES(ccn_sha, name_sha)
- crypt = aes.encrypt(both_sha + ('\x0c' * 0x0c))
- userkey = hashlib.sha1(crypt).digest()
- return userkey.encode('base64')
-
-
+META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
+NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
+
+class Decryptor(object):
+ def __init__(self, bookkey, encryption):
+ enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag)
+ self._aes = AES(bookkey)
+ encryption = etree.fromstring(encryption)
+ self._encrypted = encrypted = set()
+ expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
+ enc('CipherReference'))
+ for elem in encryption.findall(expr):
+ path = elem.get('URI', None)
+ if path is not None:
+ path = path.encode('utf-8')
+ encrypted.add(path)
+
+ def decompress(self, bytes):
+ dc = zlib.decompressobj(-15)
+ bytes = dc.decompress(bytes)
+ ex = dc.decompress('Z') + dc.flush()
+ if ex:
+ bytes = bytes + ex
+ return bytes
+
+ def decrypt(self, path, data):
+ if path in self._encrypted:
+ data = self._aes.decrypt(data)[16:]
+ data = data[:-ord(data[-1])]
+ data = self.decompress(data)
+ return data
+
+# check file to make check whether it's probably an Adobe Adept encrypted ePub
+def ignobleBook(inpath):
+ with closing(ZipFile(open(inpath, 'rb'))) as inf:
+ namelist = set(inf.namelist())
+ if 'META-INF/rights.xml' not in namelist or \
+ 'META-INF/encryption.xml' not in namelist:
+ return False
+ try:
+ rights = etree.fromstring(inf.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) == 64:
+ return True
+ except:
+ # if we couldn't check, assume it is
+ return True
+ return False
+
+def decryptBook(keyb64, inpath, outpath):
+ if AES is None:
+ raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.")
+ key = keyb64.decode('base64')[:16]
+ aes = AES(key)
+ with closing(ZipFile(open(inpath, 'rb'))) as inf:
+ namelist = set(inf.namelist())
+ if 'META-INF/rights.xml' not in namelist or \
+ 'META-INF/encryption.xml' not in namelist:
+ print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
+ return 1
+ for name in META_NAMES:
+ namelist.remove(name)
+ try:
+ rights = etree.fromstring(inf.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) != 64:
+ print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath))
+ return 1
+ bookkey = aes.decrypt(bookkey.decode('base64'))
+ bookkey = bookkey[:-ord(bookkey[-1])]
+ encryption = inf.read('META-INF/encryption.xml')
+ decryptor = Decryptor(bookkey[-16:], encryption)
+ kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
+ with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
+ zi = ZipInfo('mimetype')
+ zi.compress_type=ZIP_STORED
+ try:
+ # if the mimetype is present, get its info, including time-stamp
+ oldzi = inf.getinfo('mimetype')
+ # copy across fields to be preserved
+ zi.date_time = oldzi.date_time
+ zi.comment = oldzi.comment
+ zi.extra = oldzi.extra
+ zi.internal_attr = oldzi.internal_attr
+ # external attributes are dependent on the create system, so copy both.
+ zi.external_attr = oldzi.external_attr
+ zi.create_system = oldzi.create_system
+ except:
+ pass
+ outf.writestr(zi, inf.read('mimetype'))
+ for path in namelist:
+ data = inf.read(path)
+ zi = ZipInfo(path)
+ zi.compress_type=ZIP_DEFLATED
+ try:
+ # get the file info, including time-stamp
+ oldzi = inf.getinfo(path)
+ # copy across useful fields
+ zi.date_time = oldzi.date_time
+ zi.comment = oldzi.comment
+ zi.extra = oldzi.extra
+ zi.internal_attr = oldzi.internal_attr
+ # external attributes are dependent on the create system, so copy both.
+ zi.external_attr = oldzi.external_attr
+ zi.create_system = oldzi.create_system
+ except:
+ pass
+ outf.writestr(zi, decryptor.decrypt(path, data))
+ except:
+ print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())
+ return 2
+ return 0
def cli_main():
@@ -221,19 +323,15 @@ def cli_main():
sys.stderr=SafeUnbuffered(sys.stderr)
argv=unicode_argv()
progname = os.path.basename(argv[0])
- if AES is None:
- print "%s: This script requires OpenSSL or PyCrypto, which must be installed " \
- "separately. Read the top-of-script comment for details." % \
- (progname,)
- return 1
if len(argv) != 4:
- print u"usage: {0} <Name> <CC#> <keyfileout.b64>".format(progname)
+ print u"usage: {0} <keyfile.b64> <inbook.epub> <outbook.epub>".format(progname)
return 1
- name, ccn, keypath = argv[1:]
- userkey = generate_key(name, ccn)
- open(keypath,'wb').write(userkey)
- return 0
-
+ keypath, inpath, outpath = argv[1:]
+ userkey = open(keypath,'rb').read()
+ result = decryptBook(userkey, inpath, outpath)
+ if result == 0:
+ print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
+ return result
def gui_main():
try:
@@ -247,28 +345,33 @@ def gui_main():
class DecryptionDialog(Tkinter.Frame):
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
- self.status = Tkinter.Label(self, text=u"Enter parameters")
+ self.status = Tkinter.Label(self, text=u"Select files for decryption")
self.status.pack(fill=Tkconstants.X, expand=1)
body = Tkinter.Frame(self)
body.pack(fill=Tkconstants.X, expand=1)
sticky = Tkconstants.E + Tkconstants.W
body.grid_columnconfigure(1, weight=2)
- Tkinter.Label(body, text=u"Account Name").grid(row=0)
- self.name = Tkinter.Entry(body, width=40)
- self.name.grid(row=0, column=1, sticky=sticky)
- Tkinter.Label(body, text=u"CC#").grid(row=1)
- self.ccn = Tkinter.Entry(body, width=40)
- self.ccn.grid(row=1, column=1, sticky=sticky)
- Tkinter.Label(body, text=u"Output file").grid(row=2)
- self.keypath = Tkinter.Entry(body, width=40)
- self.keypath.grid(row=2, column=1, sticky=sticky)
- self.keypath.insert(2, u"bnepubkey.b64")
+ Tkinter.Label(body, text=u"Key file").grid(row=0)
+ self.keypath = Tkinter.Entry(body, width=30)
+ self.keypath.grid(row=0, column=1, sticky=sticky)
+ if os.path.exists(u"bnepubkey.b64"):
+ self.keypath.insert(0, u"bnepubkey.b64")
button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
+ button.grid(row=0, column=2)
+ Tkinter.Label(body, text=u"Input file").grid(row=1)
+ self.inpath = Tkinter.Entry(body, width=30)
+ self.inpath.grid(row=1, column=1, sticky=sticky)
+ button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
+ button.grid(row=1, column=2)
+ Tkinter.Label(body, text=u"Output file").grid(row=2)
+ self.outpath = Tkinter.Entry(body, width=30)
+ self.outpath.grid(row=2, column=1, sticky=sticky)
+ button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
button.grid(row=2, column=2)
buttons = Tkinter.Frame(self)
buttons.pack()
botton = Tkinter.Button(
- buttons, text=u"Generate", width=10, command=self.generate)
+ buttons, text=u"Decrypt", width=10, command=self.decrypt)
botton.pack(side=Tkconstants.LEFT)
Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
button = Tkinter.Button(
@@ -276,8 +379,8 @@ def gui_main():
button.pack(side=Tkconstants.RIGHT)
def get_keypath(self):
- keypath = tkFileDialog.asksaveasfilename(
- parent=None, title=u"Select B&N ePub key file to produce",
+ keypath = tkFileDialog.askopenfilename(
+ parent=None, title=u"Select Barnes & Noble \'.b64\' key file",
defaultextension=u".b64",
filetypes=[('base64-encoded files', '.b64'),
('All Files', '.*')])
@@ -287,37 +390,56 @@ def gui_main():
self.keypath.insert(0, keypath)
return
- def generate(self):
- name = self.name.get()
- ccn = self.ccn.get()
+ def get_inpath(self):
+ inpath = tkFileDialog.askopenfilename(
+ parent=None, title=u"Select B&N-encrypted ePub file to decrypt",
+ defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
+ if inpath:
+ inpath = os.path.normpath(inpath)
+ self.inpath.delete(0, Tkconstants.END)
+ self.inpath.insert(0, inpath)
+ return
+
+ def get_outpath(self):
+ outpath = tkFileDialog.asksaveasfilename(
+ parent=None, title=u"Select unencrypted ePub file to produce",
+ defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
+ if outpath:
+ outpath = os.path.normpath(outpath)
+ self.outpath.delete(0, Tkconstants.END)
+ self.outpath.insert(0, outpath)
+ return
+
+ def decrypt(self):
keypath = self.keypath.get()
- if not name:
- self.status['text'] = u"Name not specified"
+ inpath = self.inpath.get()
+ outpath = self.outpath.get()
+ if not keypath or not os.path.exists(keypath):
+ self.status['text'] = u"Specified key file does not exist"
return
- if not ccn:
- self.status['text'] = u"Credit card number not specified"
+ if not inpath or not os.path.exists(inpath):
+ self.status['text'] = u"Specified input file does not exist"
return
- if not keypath:
- self.status['text'] = u"Output keyfile path not specified"
+ if not outpath:
+ self.status['text'] = u"Output file not specified"
return
- self.status['text'] = u"Generating..."
+ if inpath == outpath:
+ self.status['text'] = u"Must have different input and output files"
+ return
+ userkey = open(keypath,'rb').read()
+ self.status['text'] = u"Decrypting..."
try:
- userkey = generate_key(name, ccn)
+ decrypt_status = decryptBook(userkey, inpath, outpath)
except Exception, e:
- self.status['text'] = u"Error: (0}".format(e.args[0])
+ self.status['text'] = u"Error: {0}".format(e.args[0])
return
- open(keypath,'wb').write(userkey)
- self.status['text'] = u"Keyfile successfully generated"
+ if decrypt_status == 0:
+ self.status['text'] = u"File successfully decrypted"
+ else:
+ self.status['text'] = u"The was an error decrypting the file."
root = Tkinter.Tk()
- if AES is None:
- root.withdraw()
- tkMessageBox.showerror(
- "Ignoble EPUB Keyfile Generator",
- "This script requires OpenSSL or PyCrypto, which must be installed "
- "separately. Read the top-of-script comment for details.")
- return 1
- root.title(u"Barnes & Noble ePub Keyfile Generator v.{0}".format(__version__))
+ root.title(u"Barnes & Noble ePub Decrypter v.{0}".format(__version__))
root.resizable(True, False)
root.minsize(300, 0)
DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py b/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py
index f8181cb..5118c87 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/ignoblekeygen.py
@@ -3,56 +3,47 @@
from __future__ import with_statement
-# ineptepub.pyw, version 6.1
-# Copyright © 2009-2010 by i♥cabbages
+# ignoblekeygen.pyw, version 2.5
+# Copyright © 2009-2010 i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>
# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf
-# Windows users: Before running this program, you must first install Python 2.6
-# from <http://www.python.org/download/> and PyCrypto from
-# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
-# install the version for Python 2.6). Save this script file as
-# ineptepub.pyw and double-click on it to run it.
+# Windows users: Before running this program, you must first install Python.
+# We recommend ActiveState Python 2.7.X for Windows (x86) from
+# http://www.activestate.com/activepython/downloads.
+# You must also install PyCrypto from
+# http://www.voidspace.org.uk/python/modules.shtml#pycrypto
+# (make certain to install the version for Python 2.7).
+# Then save this script file as ignoblekeygen.pyw and double-click on it to run it.
#
-# Mac OS X users: Save this script file as ineptepub.pyw. You can run this
-# program from the command line (pythonw ineptepub.pyw) or by double-clicking
+# Mac OS X users: Save this script file as ignoblekeygen.pyw. You can run this
+# program from the command line (python ignoblekeygen.pyw) or by double-clicking
# it when it has been associated with PythonLauncher.
# Revision history:
# 1 - Initial release
-# 2 - Rename to INEPT, fix exit code
-# 5 - Version bump to avoid (?) confusion;
-# Improve OS X support by using OpenSSL when available
-# 5.1 - Improve OpenSSL error checking
-# 5.2 - Fix ctypes error causing segfaults on some systems
-# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o
-# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
-# 5.5 - On Windows try PyCrypto first, OpenSSL next
-# 5.6 - Modify interface to allow use with import
-# 5.7 - Fix for potential problem with PyCrypto
-# 5.8 - Revised to allow use in calibre plugins to eliminate need for duplicate code
-# 5.9 - Fixed to retain zip file metadata (e.g. file modification date)
-# 6.0 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 6.1 - Work if TkInter is missing
+# 2 - Add OS X support by using OpenSSL when available (taken/modified from ineptepub v5)
+# 2.1 - Allow Windows versions of libcrypto to be found
+# 2.2 - On Windows try PyCrypto first and then OpenSSL next
+# 2.3 - Modify interface to allow use of import
+# 2.4 - Improvements to UI and now works in plugins
+# 2.5 - Additional improvement for unicode and plugin support
+# 2.6 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 2.7 - Work if TkInter is missing
"""
-Decrypt Adobe Digital Editions encrypted ePub books.
+Generate Barnes & Noble EPUB user key from name and credit card number.
"""
__license__ = 'GPL v3'
-__version__ = "6.1"
+__version__ = "2.7"
import sys
import os
-import traceback
-import zlib
-import zipfile
-from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
-from contextlib import closing
-import xml.etree.ElementTree as etree
+import hashlib
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -84,8 +75,8 @@ def unicode_argv():
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
+ # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
+ # as a list of Unicode strings and encode them as utf-8
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
@@ -106,7 +97,9 @@ def unicode_argv():
start = argc.value - len(sys.argv)
return [argv[i] for i in
xrange(start, argc.value)]
- return [u"ineptepub.py"]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"ignoblekeygen.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
@@ -114,7 +107,7 @@ def unicode_argv():
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-class ADEPTError(Exception):
+class IGNOBLEError(Exception):
pass
def _load_crypto_libcrypto():
@@ -128,19 +121,14 @@ def _load_crypto_libcrypto():
libcrypto = find_library('crypto')
if libcrypto is None:
- raise ADEPTError('libcrypto not found')
+ raise IGNOBLEError('libcrypto not found')
libcrypto = CDLL(libcrypto)
- RSA_NO_PADDING = 3
AES_MAXNR = 14
c_char_pp = POINTER(c_char_p)
c_int_p = POINTER(c_int)
- class RSA(Structure):
- pass
- RSA_p = POINTER(RSA)
-
class AES_KEY(Structure):
_fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
('rounds', c_int)]
@@ -152,312 +140,80 @@ def _load_crypto_libcrypto():
func.argtypes = argtypes
return func
- d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
- [RSA_p, c_char_pp, c_long])
- RSA_size = F(c_int, 'RSA_size', [RSA_p])
- RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
- [c_int, c_char_p, c_char_p, RSA_p, c_int])
- RSA_free = F(None, 'RSA_free', [RSA_p])
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
+ AES_set_encrypt_key = F(c_int, 'AES_set_encrypt_key',
[c_char_p, c_int, AES_KEY_p])
AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
c_int])
- class RSA(object):
- def __init__(self, der):
- buf = create_string_buffer(der)
- pp = c_char_pp(cast(buf, c_char_p))
- rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
- if rsa is None:
- raise ADEPTError('Error parsing ADEPT user key DER')
-
- def decrypt(self, from_):
- rsa = self._rsa
- to = create_string_buffer(RSA_size(rsa))
- dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
- RSA_NO_PADDING)
- if dlen < 0:
- raise ADEPTError('RSA decryption failed')
- return to[:dlen]
-
- def __del__(self):
- if self._rsa is not None:
- RSA_free(self._rsa)
- self._rsa = None
-
class AES(object):
- def __init__(self, userkey):
+ def __init__(self, userkey, iv):
self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise ADEPTError('AES improper key used')
- return
+ self._iv = iv
key = self._key = AES_KEY()
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
+ rv = AES_set_encrypt_key(userkey, len(userkey) * 8, key)
if rv < 0:
- raise ADEPTError('Failed to initialize AES key')
+ raise IGNOBLEError('Failed to initialize AES Encrypt key')
- def decrypt(self, data):
+ def encrypt(self, data):
out = create_string_buffer(len(data))
- iv = ("\x00" * self._blocksize)
- rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
+ rv = AES_cbc_encrypt(data, out, len(data), self._key, self._iv, 1)
if rv == 0:
- raise ADEPTError('AES decryption failed')
+ raise IGNOBLEError('AES encryption failed')
return out.raw
- return (AES, RSA)
+ return AES
def _load_crypto_pycrypto():
from Crypto.Cipher import AES as _AES
- from Crypto.PublicKey import RSA as _RSA
-
- # ASN.1 parsing code from tlslite
- class ASN1Error(Exception):
- pass
-
- class ASN1Parser(object):
- class Parser(object):
- def __init__(self, bytes):
- self.bytes = bytes
- self.index = 0
-
- def get(self, length):
- if self.index + length > len(self.bytes):
- raise ASN1Error("Error decoding ASN.1")
- x = 0
- for count in range(length):
- x <<= 8
- x |= self.bytes[self.index]
- self.index += 1
- return x
-
- def getFixBytes(self, lengthBytes):
- bytes = self.bytes[self.index : self.index+lengthBytes]
- self.index += lengthBytes
- return bytes
-
- def getVarBytes(self, lengthLength):
- lengthBytes = self.get(lengthLength)
- return self.getFixBytes(lengthBytes)
-
- def getFixList(self, length, lengthList):
- l = [0] * lengthList
- for x in range(lengthList):
- l[x] = self.get(length)
- return l
-
- def getVarList(self, length, lengthLength):
- lengthList = self.get(lengthLength)
- if lengthList % length != 0:
- raise ASN1Error("Error decoding ASN.1")
- lengthList = int(lengthList/length)
- l = [0] * lengthList
- for x in range(lengthList):
- l[x] = self.get(length)
- return l
-
- def startLengthCheck(self, lengthLength):
- self.lengthCheck = self.get(lengthLength)
- self.indexCheck = self.index
-
- def setLengthCheck(self, length):
- self.lengthCheck = length
- self.indexCheck = self.index
-
- def stopLengthCheck(self):
- if (self.index - self.indexCheck) != self.lengthCheck:
- raise ASN1Error("Error decoding ASN.1")
-
- def atLengthCheck(self):
- if (self.index - self.indexCheck) < self.lengthCheck:
- return False
- elif (self.index - self.indexCheck) == self.lengthCheck:
- return True
- else:
- raise ASN1Error("Error decoding ASN.1")
-
- def __init__(self, bytes):
- p = self.Parser(bytes)
- p.get(1)
- self.length = self._getASN1Length(p)
- self.value = p.getFixBytes(self.length)
-
- def getChild(self, which):
- p = self.Parser(self.value)
- for x in range(which+1):
- markIndex = p.index
- p.get(1)
- length = self._getASN1Length(p)
- p.getFixBytes(length)
- return ASN1Parser(p.bytes[markIndex:p.index])
-
- def _getASN1Length(self, p):
- firstLength = p.get(1)
- if firstLength<=127:
- return firstLength
- else:
- lengthLength = firstLength & 0x7F
- return p.get(lengthLength)
class AES(object):
- def __init__(self, key):
- self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16)
+ def __init__(self, key, iv):
+ self._aes = _AES.new(key, _AES.MODE_CBC, iv)
- def decrypt(self, data):
- return self._aes.decrypt(data)
+ def encrypt(self, data):
+ return self._aes.encrypt(data)
- class RSA(object):
- def __init__(self, der):
- key = ASN1Parser([ord(x) for x in der])
- key = [key.getChild(x).value for x in xrange(1, 4)]
- key = [self.bytesToNumber(v) for v in key]
- self._rsa = _RSA.construct(key)
-
- def bytesToNumber(self, bytes):
- total = 0L
- for byte in bytes:
- total = (total << 8) + byte
- return total
-
- def decrypt(self, data):
- return self._rsa.decrypt(data)
-
- return (AES, RSA)
+ return AES
def _load_crypto():
- AES = RSA = None
+ AES = None
cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
if sys.platform.startswith('win'):
cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
for loader in cryptolist:
try:
- AES, RSA = loader()
+ AES = loader()
break
- except (ImportError, ADEPTError):
+ except (ImportError, IGNOBLEError):
pass
- return (AES, RSA)
-
-AES, RSA = _load_crypto()
-
-META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
-NSMAP = {'adept': 'http://ns.adobe.com/adept',
- 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
-
-class Decryptor(object):
- def __init__(self, bookkey, encryption):
- enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag)
- self._aes = AES(bookkey)
- encryption = etree.fromstring(encryption)
- self._encrypted = encrypted = set()
- expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
- enc('CipherReference'))
- for elem in encryption.findall(expr):
- path = elem.get('URI', None)
- if path is not None:
- path = path.encode('utf-8')
- encrypted.add(path)
-
- def decompress(self, bytes):
- dc = zlib.decompressobj(-15)
- bytes = dc.decompress(bytes)
- ex = dc.decompress('Z') + dc.flush()
- if ex:
- bytes = bytes + ex
- return bytes
-
- def decrypt(self, path, data):
- if path in self._encrypted:
- data = self._aes.decrypt(data)[16:]
- data = data[:-ord(data[-1])]
- data = self.decompress(data)
- return data
-
-# check file to make check whether it's probably an Adobe Adept encrypted ePub
-def adeptBook(inpath):
- with closing(ZipFile(open(inpath, 'rb'))) as inf:
- namelist = set(inf.namelist())
- if 'META-INF/rights.xml' not in namelist or \
- 'META-INF/encryption.xml' not in namelist:
- return False
- try:
- rights = etree.fromstring(inf.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) == 172:
- return True
- except:
- # if we couldn't check, assume it is
- return True
- return False
-
-def decryptBook(userkey, inpath, outpath):
- if AES is None:
- raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
- rsa = RSA(userkey)
- with closing(ZipFile(open(inpath, 'rb'))) as inf:
- namelist = set(inf.namelist())
- if 'META-INF/rights.xml' not in namelist or \
- 'META-INF/encryption.xml' not in namelist:
- print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
- return 1
- for name in META_NAMES:
- namelist.remove(name)
- try:
- rights = etree.fromstring(inf.read('META-INF/rights.xml'))
- adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
- expr = './/%s' % (adept('encryptedKey'),)
- bookkey = ''.join(rights.findtext(expr))
- if len(bookkey) != 172:
- print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath))
- return 1
- bookkey = rsa.decrypt(bookkey.decode('base64'))
- # Padded as per RSAES-PKCS1-v1_5
- if bookkey[-17] != '\x00':
- print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath))
- return 2
- encryption = inf.read('META-INF/encryption.xml')
- decryptor = Decryptor(bookkey[-16:], encryption)
- kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
- with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
- zi = ZipInfo('mimetype')
- zi.compress_type=ZIP_STORED
- try:
- # if the mimetype is present, get its info, including time-stamp
- oldzi = inf.getinfo('mimetype')
- # copy across fields to be preserved
- zi.date_time = oldzi.date_time
- zi.comment = oldzi.comment
- zi.extra = oldzi.extra
- zi.internal_attr = oldzi.internal_attr
- # external attributes are dependent on the create system, so copy both.
- zi.external_attr = oldzi.external_attr
- zi.create_system = oldzi.create_system
- except:
- pass
- outf.writestr(zi, inf.read('mimetype'))
- for path in namelist:
- data = inf.read(path)
- zi = ZipInfo(path)
- zi.compress_type=ZIP_DEFLATED
- try:
- # get the file info, including time-stamp
- oldzi = inf.getinfo(path)
- # copy across useful fields
- zi.date_time = oldzi.date_time
- zi.comment = oldzi.comment
- zi.extra = oldzi.extra
- zi.internal_attr = oldzi.internal_attr
- # external attributes are dependent on the create system, so copy both.
- zi.external_attr = oldzi.external_attr
- zi.create_system = oldzi.create_system
- except:
- pass
- outf.writestr(zi, decryptor.decrypt(path, data))
- except:
- print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())
- return 2
- return 0
+ return AES
+
+AES = _load_crypto()
+
+def normalize_name(name):
+ return ''.join(x for x in name.lower() if x != ' ')
+
+
+def generate_key(name, ccn):
+ # remove spaces and case from name and CC numbers.
+ if type(name)==unicode:
+ name = name.encode('utf-8')
+ if type(ccn)==unicode:
+ ccn = ccn.encode('utf-8')
+
+ name = normalize_name(name) + '\x00'
+ ccn = normalize_name(ccn) + '\x00'
+
+ name_sha = hashlib.sha1(name).digest()[:16]
+ ccn_sha = hashlib.sha1(ccn).digest()[:16]
+ both_sha = hashlib.sha1(name + ccn).digest()
+ aes = AES(ccn_sha, name_sha)
+ crypt = aes.encrypt(both_sha + ('\x0c' * 0x0c))
+ userkey = hashlib.sha1(crypt).digest()
+ return userkey.encode('base64')
+
+
def cli_main():
@@ -465,15 +221,19 @@ def cli_main():
sys.stderr=SafeUnbuffered(sys.stderr)
argv=unicode_argv()
progname = os.path.basename(argv[0])
+ if AES is None:
+ print "%s: This script requires OpenSSL or PyCrypto, which must be installed " \
+ "separately. Read the top-of-script comment for details." % \
+ (progname,)
+ return 1
if len(argv) != 4:
- print u"usage: {0} <keyfile.der> <inbook.epub> <outbook.epub>".format(progname)
+ print u"usage: {0} <Name> <CC#> <keyfileout.b64>".format(progname)
return 1
- keypath, inpath, outpath = argv[1:]
- userkey = open(keypath,'rb').read()
- result = decryptBook(userkey, inpath, outpath)
- if result == 0:
- print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
- return result
+ name, ccn, keypath = argv[1:]
+ userkey = generate_key(name, ccn)
+ open(keypath,'wb').write(userkey)
+ return 0
+
def gui_main():
try:
@@ -487,33 +247,28 @@ def gui_main():
class DecryptionDialog(Tkinter.Frame):
def __init__(self, root):
Tkinter.Frame.__init__(self, root, border=5)
- self.status = Tkinter.Label(self, text=u"Select files for decryption")
+ self.status = Tkinter.Label(self, text=u"Enter parameters")
self.status.pack(fill=Tkconstants.X, expand=1)
body = Tkinter.Frame(self)
body.pack(fill=Tkconstants.X, expand=1)
sticky = Tkconstants.E + Tkconstants.W
body.grid_columnconfigure(1, weight=2)
- Tkinter.Label(body, text=u"Key file").grid(row=0)
- self.keypath = Tkinter.Entry(body, width=30)
- self.keypath.grid(row=0, column=1, sticky=sticky)
- if os.path.exists(u"adeptkey.der"):
- self.keypath.insert(0, u"adeptkey.der")
- button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
- button.grid(row=0, column=2)
- Tkinter.Label(body, text=u"Input file").grid(row=1)
- self.inpath = Tkinter.Entry(body, width=30)
- self.inpath.grid(row=1, column=1, sticky=sticky)
- button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
- button.grid(row=1, column=2)
+ Tkinter.Label(body, text=u"Account Name").grid(row=0)
+ self.name = Tkinter.Entry(body, width=40)
+ self.name.grid(row=0, column=1, sticky=sticky)
+ Tkinter.Label(body, text=u"CC#").grid(row=1)
+ self.ccn = Tkinter.Entry(body, width=40)
+ self.ccn.grid(row=1, column=1, sticky=sticky)
Tkinter.Label(body, text=u"Output file").grid(row=2)
- self.outpath = Tkinter.Entry(body, width=30)
- self.outpath.grid(row=2, column=1, sticky=sticky)
- button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
+ self.keypath = Tkinter.Entry(body, width=40)
+ self.keypath.grid(row=2, column=1, sticky=sticky)
+ self.keypath.insert(2, u"bnepubkey.b64")
+ button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
button.grid(row=2, column=2)
buttons = Tkinter.Frame(self)
buttons.pack()
botton = Tkinter.Button(
- buttons, text=u"Decrypt", width=10, command=self.decrypt)
+ buttons, text=u"Generate", width=10, command=self.generate)
botton.pack(side=Tkconstants.LEFT)
Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
button = Tkinter.Button(
@@ -521,10 +276,10 @@ def gui_main():
button.pack(side=Tkconstants.RIGHT)
def get_keypath(self):
- keypath = tkFileDialog.askopenfilename(
- parent=None, title=u"Select Adobe Adept \'.der\' key file",
- defaultextension=u".der",
- filetypes=[('Adobe Adept DER-encoded files', '.der'),
+ keypath = tkFileDialog.asksaveasfilename(
+ parent=None, title=u"Select B&N ePub key file to produce",
+ defaultextension=u".b64",
+ filetypes=[('base64-encoded files', '.b64'),
('All Files', '.*')])
if keypath:
keypath = os.path.normpath(keypath)
@@ -532,56 +287,37 @@ def gui_main():
self.keypath.insert(0, keypath)
return
- def get_inpath(self):
- inpath = tkFileDialog.askopenfilename(
- parent=None, title=u"Select ADEPT-encrypted ePub file to decrypt",
- defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
- if inpath:
- inpath = os.path.normpath(inpath)
- self.inpath.delete(0, Tkconstants.END)
- self.inpath.insert(0, inpath)
- return
-
- def get_outpath(self):
- outpath = tkFileDialog.asksaveasfilename(
- parent=None, title=u"Select unencrypted ePub file to produce",
- defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
- if outpath:
- outpath = os.path.normpath(outpath)
- self.outpath.delete(0, Tkconstants.END)
- self.outpath.insert(0, outpath)
- return
-
- def decrypt(self):
+ def generate(self):
+ name = self.name.get()
+ ccn = self.ccn.get()
keypath = self.keypath.get()
- inpath = self.inpath.get()
- outpath = self.outpath.get()
- if not keypath or not os.path.exists(keypath):
- self.status['text'] = u"Specified key file does not exist"
- return
- if not inpath or not os.path.exists(inpath):
- self.status['text'] = u"Specified input file does not exist"
+ if not name:
+ self.status['text'] = u"Name not specified"
return
- if not outpath:
- self.status['text'] = u"Output file not specified"
+ if not ccn:
+ self.status['text'] = u"Credit card number not specified"
return
- if inpath == outpath:
- self.status['text'] = u"Must have different input and output files"
+ if not keypath:
+ self.status['text'] = u"Output keyfile path not specified"
return
- userkey = open(keypath,'rb').read()
- self.status['text'] = u"Decrypting..."
+ self.status['text'] = u"Generating..."
try:
- decrypt_status = decryptBook(userkey, inpath, outpath)
+ userkey = generate_key(name, ccn)
except Exception, e:
- self.status['text'] = u"Error: {0}".format(e.args[0])
+ self.status['text'] = u"Error: (0}".format(e.args[0])
return
- if decrypt_status == 0:
- self.status['text'] = u"File successfully decrypted"
- else:
- self.status['text'] = u"The was an error decrypting the file."
+ open(keypath,'wb').write(userkey)
+ self.status['text'] = u"Keyfile successfully generated"
root = Tkinter.Tk()
- root.title(u"Adobe Adept ePub Decrypter v.{0}".format(__version__))
+ if AES is None:
+ root.withdraw()
+ tkMessageBox.showerror(
+ "Ignoble EPUB Keyfile Generator",
+ "This script requires OpenSSL or PyCrypto, which must be installed "
+ "separately. Read the top-of-script comment for details.")
+ return 1
+ root.title(u"Barnes & Noble ePub Keyfile Generator v.{0}".format(__version__))
root.resizable(True, False)
root.minsize(300, 0)
DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py b/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py
index 1986e20..f8181cb 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/ineptepub.py
@@ -1,73 +1,57 @@
-#! /usr/bin/python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import with_statement
-# ineptpdf.pyw, version 7.11
+# ineptepub.pyw, version 6.1
# Copyright © 2009-2010 by i♥cabbages
# Released under the terms of the GNU General Public Licence, version 3
# <http://www.gnu.org/licenses/>
-# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf
+# Modified 2010–2013 by some_updates, DiapDealer and Apprentice Alf
# Windows users: Before running this program, you must first install Python 2.6
# from <http://www.python.org/download/> and PyCrypto from
# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
# install the version for Python 2.6). Save this script file as
-# ineptpdf.pyw and double-click on it to run it.
+# ineptepub.pyw and double-click on it to run it.
#
-# Mac OS X users: Save this script file as ineptpdf.pyw. You can run this
-# program from the command line (pythonw ineptpdf.pyw) or by double-clicking
+# Mac OS X users: Save this script file as ineptepub.pyw. You can run this
+# program from the command line (pythonw ineptepub.pyw) or by double-clicking
# it when it has been associated with PythonLauncher.
# Revision history:
# 1 - Initial release
-# 2 - Improved determination of key-generation algorithm
-# 3 - Correctly handle PDF >=1.5 cross-reference streams
-# 4 - Removal of ciando's personal ID
-# 5 - Automated decryption of a complete directory
-# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
-# 7 - Get cross reference streams and object streams working for input.
-# Not yet supported on output but this only effects file size,
-# not functionality. (anon2)
-# 7.1 - Correct a problem when an old trailer is not followed by startxref
-# 7.2 - Correct malformed Mac OS resource forks for Stanza (anon2)
-# - Support for cross ref streams on output (decreases file size)
-# 7.3 - Correct bug in trailer with cross ref stream that caused the error
-# "The root object is missing or invalid" in Adobe Reader. (anon2)
-# 7.4 - Force all generation numbers in output file to be 0, like in v6.
-# Fallback code for wrong xref improved (search till last trailer
-# instead of first) (anon2)
-# 7.5 - allow support for OpenSSL to replace pycrypto on all platforms
-# implemented ARC4 interface to OpenSSL
-# fixed minor typos
-# 7.6 - backported AES and other fixes from version 8.4.48
-# 7.7 - On Windows try PyCrypto first and OpenSSL next
-# 7.8 - Modify interface to allow use of import
-# 7.9 - Bug fix for some session key errors when len(bookkey) > length required
-# 7.10 - Various tweaks to fix minor problems.
-# 7.11 - More tweaks to fix minor problems.
-# 7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code
-# 7.13 - Fixed erroneous mentions of ineptepub
-# 7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 8.0 - Work if TkInter is missing
-# 8.0.1 - Broken Metadata fix.
+# 2 - Rename to INEPT, fix exit code
+# 5 - Version bump to avoid (?) confusion;
+# Improve OS X support by using OpenSSL when available
+# 5.1 - Improve OpenSSL error checking
+# 5.2 - Fix ctypes error causing segfaults on some systems
+# 5.3 - add support for OpenSSL on Windows, fix bug with some versions of libcrypto 0.9.8 prior to path level o
+# 5.4 - add support for encoding to 'utf-8' when building up list of files to decrypt from encryption.xml
+# 5.5 - On Windows try PyCrypto first, OpenSSL next
+# 5.6 - Modify interface to allow use with import
+# 5.7 - Fix for potential problem with PyCrypto
+# 5.8 - Revised to allow use in calibre plugins to eliminate need for duplicate code
+# 5.9 - Fixed to retain zip file metadata (e.g. file modification date)
+# 6.0 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 6.1 - Work if TkInter is missing
"""
-Decrypts Adobe ADEPT-encrypted PDF files.
+Decrypt Adobe Digital Editions encrypted ePub books.
"""
__license__ = 'GPL v3'
-__version__ = "8.0.1"
+__version__ = "6.1"
import sys
import os
-import re
+import traceback
import zlib
-import struct
-import hashlib
-from itertools import chain, islice
+import zipfile
+from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED
+from contextlib import closing
import xml.etree.ElementTree as etree
# Wrap a stream so that output gets flushed immediately
@@ -87,8 +71,11 @@ class SafeUnbuffered:
def __getattr__(self, attr):
return getattr(self.stream, attr)
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
+try:
+ from calibre.constants import iswindows, isosx
+except:
+ iswindows = sys.platform.startswith('win')
+ isosx = sys.platform.startswith('darwin')
def unicode_argv():
if iswindows:
@@ -119,7 +106,7 @@ def unicode_argv():
start = argc.value - len(sys.argv)
return [argv[i] for i in
xrange(start, argc.value)]
- return [u"ineptpdf.py"]
+ return [u"ineptepub.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
@@ -130,21 +117,12 @@ def unicode_argv():
class ADEPTError(Exception):
pass
-
-import hashlib
-
-def SHA256(message):
- ctx = hashlib.sha256()
- ctx.update(message)
- return ctx.digest()
-
-
def _load_crypto_libcrypto():
from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
Structure, c_ulong, create_string_buffer, cast
from ctypes.util import find_library
- if sys.platform.startswith('win'):
+ if iswindows:
libcrypto = find_library('libeay32')
else:
libcrypto = find_library('crypto')
@@ -153,43 +131,38 @@ def _load_crypto_libcrypto():
raise ADEPTError('libcrypto not found')
libcrypto = CDLL(libcrypto)
- AES_MAXNR = 14
-
RSA_NO_PADDING = 3
+ AES_MAXNR = 14
c_char_pp = POINTER(c_char_p)
c_int_p = POINTER(c_int)
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
- AES_KEY_p = POINTER(AES_KEY)
-
- class RC4_KEY(Structure):
- _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)]
- RC4_KEY_p = POINTER(RC4_KEY)
-
class RSA(Structure):
pass
RSA_p = POINTER(RSA)
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))),
+ ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
def F(restype, name, argtypes):
func = getattr(libcrypto, name)
func.restype = restype
func.argtypes = argtypes
return func
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-
- RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p])
- RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p])
-
d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
[RSA_p, c_char_pp, c_long])
RSA_size = F(c_int, 'RSA_size', [RSA_p])
RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
[c_int, c_char_p, c_char_p, RSA_p, c_int])
RSA_free = F(None, 'RSA_free', [RSA_p])
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',
+ [c_char_p, c_int, AES_KEY_p])
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',
+ [c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,
+ c_int])
class RSA(object):
def __init__(self, der):
@@ -206,65 +179,37 @@ def _load_crypto_libcrypto():
RSA_NO_PADDING)
if dlen < 0:
raise ADEPTError('RSA decryption failed')
- return to[1:dlen]
+ return to[:dlen]
def __del__(self):
if self._rsa is not None:
RSA_free(self._rsa)
self._rsa = None
- class ARC4(object):
- @classmethod
- def new(cls, userkey):
- self = ARC4()
- self._blocksize = len(userkey)
- key = self._key = RC4_KEY()
- RC4_set_key(key, self._blocksize, userkey)
- return self
- def __init__(self):
- self._blocksize = 0
- self._key = None
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- RC4_crypt(self._key, len(data), data, out)
- return out.raw
-
class AES(object):
- MODE_CBC = 0
- @classmethod
- def new(cls, userkey, mode, iv):
- self = AES()
+ def __init__(self, userkey):
self._blocksize = len(userkey)
- # mode is ignored since CBCMODE is only thing supported/used so far
- self._mode = mode
if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
raise ADEPTError('AES improper key used')
return
- keyctx = self._keyctx = AES_KEY()
- self._iv = iv
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ key = self._key = AES_KEY()
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, key)
if rv < 0:
raise ADEPTError('Failed to initialize AES key')
- return self
- def __init__(self):
- self._blocksize = 0
- self._keyctx = None
- self._iv = 0
- self._mode = 0
+
def decrypt(self, data):
out = create_string_buffer(len(data))
- rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0)
+ iv = ("\x00" * self._blocksize)
+ rv = AES_cbc_encrypt(data, out, len(data), self._key, iv, 0)
if rv == 0:
raise ADEPTError('AES decryption failed')
return out.raw
- return (ARC4, RSA, AES)
-
+ return (AES, RSA)
def _load_crypto_pycrypto():
- from Crypto.PublicKey import RSA as _RSA
- from Crypto.Cipher import ARC4 as _ARC4
from Crypto.Cipher import AES as _AES
+ from Crypto.PublicKey import RSA as _RSA
# ASN.1 parsing code from tlslite
class ASN1Error(Exception):
@@ -354,26 +299,10 @@ def _load_crypto_pycrypto():
lengthLength = firstLength & 0x7F
return p.get(lengthLength)
- class ARC4(object):
- @classmethod
- def new(cls, userkey):
- self = ARC4()
- self._arc4 = _ARC4.new(userkey)
- return self
- def __init__(self):
- self._arc4 = None
- def decrypt(self, data):
- return self._arc4.decrypt(data)
-
class AES(object):
- MODE_CBC = _AES.MODE_CBC
- @classmethod
- def new(cls, userkey, mode, iv):
- self = AES()
- self._aes = _AES.new(userkey, mode, iv)
- return self
- def __init__(self):
- self._aes = None
+ def __init__(self, key):
+ self._aes = _AES.new(key, _AES.MODE_CBC, '\x00'*16)
+
def decrypt(self, data):
return self._aes.decrypt(data)
@@ -393,1801 +322,141 @@ def _load_crypto_pycrypto():
def decrypt(self, data):
return self._rsa.decrypt(data)
- return (ARC4, RSA, AES)
+ return (AES, RSA)
def _load_crypto():
- ARC4 = RSA = AES = None
+ AES = RSA = None
cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
if sys.platform.startswith('win'):
cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
for loader in cryptolist:
try:
- ARC4, RSA, AES = loader()
+ AES, RSA = loader()
break
except (ImportError, ADEPTError):
pass
- return (ARC4, RSA, AES)
-ARC4, RSA, AES = _load_crypto()
-
-
-try:
- from cStringIO import StringIO
-except ImportError:
- from StringIO import StringIO
-
-
-# Do we generate cross reference streams on output?
-# 0 = never
-# 1 = only if present in input
-# 2 = always
-
-GEN_XREF_STM = 1
-
-# This is the value for the current document
-gen_xref_stm = False # will be set in PDFSerializer
-
-# PDF parsing routines from pdfminer, with changes for EBX_HANDLER
-
-# Utilities
-
-def choplist(n, seq):
- '''Groups every n elements of the list.'''
- r = []
- for x in seq:
- r.append(x)
- if len(r) == n:
- yield tuple(r)
- r = []
- return
-
-def nunpack(s, default=0):
- '''Unpacks up to 4 bytes big endian.'''
- l = len(s)
- if not l:
- return default
- elif l == 1:
- return ord(s)
- elif l == 2:
- return struct.unpack('>H', s)[0]
- elif l == 3:
- return struct.unpack('>L', '\x00'+s)[0]
- elif l == 4:
- return struct.unpack('>L', s)[0]
- else:
- return TypeError('invalid length: %d' % l)
-
-
-STRICT = 0
-
-
-# PS Exceptions
-
-class PSException(Exception): pass
-class PSEOF(PSException): pass
-class PSSyntaxError(PSException): pass
-class PSTypeError(PSException): pass
-class PSValueError(PSException): pass
-
-
-# Basic PostScript Types
-
-
-# PSLiteral
-class PSObject(object): pass
-
-class PSLiteral(PSObject):
- '''
- PS literals (e.g. "/Name").
- Caution: Never create these objects directly.
- Use PSLiteralTable.intern() instead.
- '''
- def __init__(self, name):
- self.name = name
- return
-
- def __repr__(self):
- name = []
- for char in self.name:
- if not char.isalnum():
- char = '#%02x' % ord(char)
- name.append(char)
- return '/%s' % ''.join(name)
-
-# PSKeyword
-class PSKeyword(PSObject):
- '''
- PS keywords (e.g. "showpage").
- Caution: Never create these objects directly.
- Use PSKeywordTable.intern() instead.
- '''
- def __init__(self, name):
- self.name = name
- return
-
- def __repr__(self):
- return self.name
-
-# PSSymbolTable
-class PSSymbolTable(object):
-
- '''
- Symbol table that stores PSLiteral or PSKeyword.
- '''
-
- def __init__(self, classe):
- self.dic = {}
- self.classe = classe
- return
-
- def intern(self, name):
- if name in self.dic:
- lit = self.dic[name]
- else:
- lit = self.classe(name)
- self.dic[name] = lit
- return lit
-
-PSLiteralTable = PSSymbolTable(PSLiteral)
-PSKeywordTable = PSSymbolTable(PSKeyword)
-LIT = PSLiteralTable.intern
-KWD = PSKeywordTable.intern
-KEYWORD_BRACE_BEGIN = KWD('{')
-KEYWORD_BRACE_END = KWD('}')
-KEYWORD_ARRAY_BEGIN = KWD('[')
-KEYWORD_ARRAY_END = KWD(']')
-KEYWORD_DICT_BEGIN = KWD('<<')
-KEYWORD_DICT_END = KWD('>>')
-
-
-def literal_name(x):
- if not isinstance(x, PSLiteral):
- if STRICT:
- raise PSTypeError('Literal required: %r' % x)
- else:
- return str(x)
- return x.name
-
-def keyword_name(x):
- if not isinstance(x, PSKeyword):
- if STRICT:
- raise PSTypeError('Keyword required: %r' % x)
- else:
- return str(x)
- return x.name
-
-
-## PSBaseParser
-##
-EOL = re.compile(r'[\r\n]')
-SPC = re.compile(r'\s')
-NONSPC = re.compile(r'\S')
-HEX = re.compile(r'[0-9a-fA-F]')
-END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
-END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
-HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
-END_NUMBER = re.compile(r'[^0-9]')
-END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
-END_STRING = re.compile(r'[()\134]')
-OCT_STRING = re.compile(r'[0-7]')
-ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
-
-class PSBaseParser(object):
-
- '''
- Most basic PostScript parser that performs only basic tokenization.
- '''
- BUFSIZ = 4096
-
- def __init__(self, fp):
- self.fp = fp
- self.seek(0)
- return
-
- def __repr__(self):
- return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
-
- def flush(self):
- return
-
- def close(self):
- self.flush()
- return
-
- def tell(self):
- return self.bufpos+self.charpos
-
- def poll(self, pos=None, n=80):
- pos0 = self.fp.tell()
- if not pos:
- pos = self.bufpos+self.charpos
- self.fp.seek(pos)
- ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
- self.fp.seek(pos0)
- return
-
- def seek(self, pos):
- '''
- Seeks the parser to the given position.
- '''
- self.fp.seek(pos)
- # reset the status for nextline()
- self.bufpos = pos
- self.buf = ''
- self.charpos = 0
- # reset the status for nexttoken()
- self.parse1 = self.parse_main
- self.tokens = []
- return
-
- def fillbuf(self):
- if self.charpos < len(self.buf): return
- # fetch next chunk.
- self.bufpos = self.fp.tell()
- self.buf = self.fp.read(self.BUFSIZ)
- if not self.buf:
- raise PSEOF('Unexpected EOF')
- self.charpos = 0
- return
-
- def parse_main(self, s, i):
- m = NONSPC.search(s, i)
- if not m:
- return (self.parse_main, len(s))
- j = m.start(0)
- c = s[j]
- self.tokenstart = self.bufpos+j
- if c == '%':
- self.token = '%'
- return (self.parse_comment, j+1)
- if c == '/':
- self.token = ''
- return (self.parse_literal, j+1)
- if c in '-+' or c.isdigit():
- self.token = c
- return (self.parse_number, j+1)
- if c == '.':
- self.token = c
- return (self.parse_float, j+1)
- if c.isalpha():
- self.token = c
- return (self.parse_keyword, j+1)
- if c == '(':
- self.token = ''
- self.paren = 1
- return (self.parse_string, j+1)
- if c == '<':
- self.token = ''
- return (self.parse_wopen, j+1)
- if c == '>':
- self.token = ''
- return (self.parse_wclose, j+1)
- self.add_token(KWD(c))
- return (self.parse_main, j+1)
-
- def add_token(self, obj):
- self.tokens.append((self.tokenstart, obj))
- return
-
- def parse_comment(self, s, i):
- m = EOL.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_comment, len(s))
- j = m.start(0)
- self.token += s[i:j]
- # We ignore comments.
- #self.tokens.append(self.token)
- return (self.parse_main, j)
-
- def parse_literal(self, s, i):
- m = END_LITERAL.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_literal, len(s))
- j = m.start(0)
- self.token += s[i:j]
- c = s[j]
- if c == '#':
- self.hex = ''
- return (self.parse_literal_hex, j+1)
- self.add_token(LIT(self.token))
- return (self.parse_main, j)
-
- def parse_literal_hex(self, s, i):
- c = s[i]
- if HEX.match(c) and len(self.hex) < 2:
- self.hex += c
- return (self.parse_literal_hex, i+1)
- if self.hex:
- self.token += chr(int(self.hex, 16))
- return (self.parse_literal, i)
-
- def parse_number(self, s, i):
- m = END_NUMBER.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_number, len(s))
- j = m.start(0)
- self.token += s[i:j]
- c = s[j]
- if c == '.':
- self.token += c
- return (self.parse_float, j+1)
- try:
- self.add_token(int(self.token))
- except ValueError:
- pass
- return (self.parse_main, j)
- def parse_float(self, s, i):
- m = END_NUMBER.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_float, len(s))
- j = m.start(0)
- self.token += s[i:j]
- self.add_token(float(self.token))
- return (self.parse_main, j)
-
- def parse_keyword(self, s, i):
- m = END_KEYWORD.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_keyword, len(s))
- j = m.start(0)
- self.token += s[i:j]
- if self.token == 'true':
- token = True
- elif self.token == 'false':
- token = False
- else:
- token = KWD(self.token)
- self.add_token(token)
- return (self.parse_main, j)
-
- def parse_string(self, s, i):
- m = END_STRING.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_string, len(s))
- j = m.start(0)
- self.token += s[i:j]
- c = s[j]
- if c == '\\':
- self.oct = ''
- return (self.parse_string_1, j+1)
- if c == '(':
- self.paren += 1
- self.token += c
- return (self.parse_string, j+1)
- if c == ')':
- self.paren -= 1
- if self.paren:
- self.token += c
- return (self.parse_string, j+1)
- self.add_token(self.token)
- return (self.parse_main, j+1)
- def parse_string_1(self, s, i):
- c = s[i]
- if OCT_STRING.match(c) and len(self.oct) < 3:
- self.oct += c
- return (self.parse_string_1, i+1)
- if self.oct:
- self.token += chr(int(self.oct, 8))
- return (self.parse_string, i)
- if c in ESC_STRING:
- self.token += chr(ESC_STRING[c])
- return (self.parse_string, i+1)
-
- def parse_wopen(self, s, i):
- c = s[i]
- if c.isspace() or HEX.match(c):
- return (self.parse_hexstring, i)
- if c == '<':
- self.add_token(KEYWORD_DICT_BEGIN)
- i += 1
- return (self.parse_main, i)
-
- def parse_wclose(self, s, i):
- c = s[i]
- if c == '>':
- self.add_token(KEYWORD_DICT_END)
- i += 1
- return (self.parse_main, i)
-
- def parse_hexstring(self, s, i):
- m = END_HEX_STRING.search(s, i)
- if not m:
- self.token += s[i:]
- return (self.parse_hexstring, len(s))
- j = m.start(0)
- self.token += s[i:j]
- token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
- SPC.sub('', self.token))
- self.add_token(token)
- return (self.parse_main, j)
-
- def nexttoken(self):
- while not self.tokens:
- self.fillbuf()
- (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
- token = self.tokens.pop(0)
- return token
-
- def nextline(self):
- '''
- Fetches a next line that ends either with \\r or \\n.
- '''
- linebuf = ''
- linepos = self.bufpos + self.charpos
- eol = False
- while 1:
- self.fillbuf()
- if eol:
- c = self.buf[self.charpos]
- # handle '\r\n'
- if c == '\n':
- linebuf += c
- self.charpos += 1
- break
- m = EOL.search(self.buf, self.charpos)
- if m:
- linebuf += self.buf[self.charpos:m.end(0)]
- self.charpos = m.end(0)
- if linebuf[-1] == '\r':
- eol = True
- else:
- break
- else:
- linebuf += self.buf[self.charpos:]
- self.charpos = len(self.buf)
- return (linepos, linebuf)
-
- def revreadlines(self):
- '''
- Fetches a next line backword. This is used to locate
- the trailers at the end of a file.
- '''
- self.fp.seek(0, 2)
- pos = self.fp.tell()
- buf = ''
- while 0 < pos:
- prevpos = pos
- pos = max(0, pos-self.BUFSIZ)
- self.fp.seek(pos)
- s = self.fp.read(prevpos-pos)
- if not s: break
- while 1:
- n = max(s.rfind('\r'), s.rfind('\n'))
- if n == -1:
- buf = s + buf
- break
- yield s[n:]+buf
- s = s[:n]
- buf = ''
- return
-
-
-## PSStackParser
-##
-class PSStackParser(PSBaseParser):
-
- def __init__(self, fp):
- PSBaseParser.__init__(self, fp)
- self.reset()
- return
-
- def reset(self):
- self.context = []
- self.curtype = None
- self.curstack = []
- self.results = []
- return
-
- def seek(self, pos):
- PSBaseParser.seek(self, pos)
- self.reset()
- return
-
- def push(self, *objs):
- self.curstack.extend(objs)
- return
- def pop(self, n):
- objs = self.curstack[-n:]
- self.curstack[-n:] = []
- return objs
- def popall(self):
- objs = self.curstack
- self.curstack = []
- return objs
- def add_results(self, *objs):
- self.results.extend(objs)
- return
-
- def start_type(self, pos, type):
- self.context.append((pos, self.curtype, self.curstack))
- (self.curtype, self.curstack) = (type, [])
- return
- def end_type(self, type):
- if self.curtype != type:
- raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
- objs = [ obj for (_,obj) in self.curstack ]
- (pos, self.curtype, self.curstack) = self.context.pop()
- return (pos, objs)
-
- def do_keyword(self, pos, token):
- return
-
- def nextobject(self, direct=False):
- '''
- Yields a list of objects: keywords, literals, strings,
- numbers, arrays and dictionaries. Arrays and dictionaries
- are represented as Python sequence and dictionaries.
- '''
- while not self.results:
- (pos, token) = self.nexttoken()
- ##print (pos,token), (self.curtype, self.curstack)
- if (isinstance(token, int) or
- isinstance(token, float) or
- isinstance(token, bool) or
- isinstance(token, str) or
- isinstance(token, PSLiteral)):
- # normal token
- self.push((pos, token))
- elif token == KEYWORD_ARRAY_BEGIN:
- # begin array
- self.start_type(pos, 'a')
- elif token == KEYWORD_ARRAY_END:
- # end array
- try:
- self.push(self.end_type('a'))
- except PSTypeError:
- if STRICT: raise
- elif token == KEYWORD_DICT_BEGIN:
- # begin dictionary
- self.start_type(pos, 'd')
- elif token == KEYWORD_DICT_END:
- # end dictionary
- try:
- (pos, objs) = self.end_type('d')
- if len(objs) % 2 != 0:
- print "Incomplete dictionary construct"
- objs.append("") # this isn't necessary.
- # temporary fix. is this due to rental books?
- # raise PSSyntaxError(
- # 'Invalid dictionary construct: %r' % objs)
- d = dict((literal_name(k), v) \
- for (k,v) in choplist(2, objs))
- self.push((pos, d))
- except PSTypeError:
- if STRICT: raise
- else:
- self.do_keyword(pos, token)
- if self.context:
- continue
- else:
- if direct:
- return self.pop(1)[0]
- self.flush()
- obj = self.results.pop(0)
- return obj
-
-
-LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
-LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
-LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
-LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
-
-
-## PDF Objects
-##
-class PDFObject(PSObject): pass
-
-class PDFException(PSException): pass
-class PDFTypeError(PDFException): pass
-class PDFValueError(PDFException): pass
-class PDFNotImplementedError(PSException): pass
-
-
-## PDFObjRef
-##
-class PDFObjRef(PDFObject):
-
- def __init__(self, doc, objid, genno):
- if objid == 0:
- if STRICT:
- raise PDFValueError('PDF object id cannot be 0.')
- self.doc = doc
- self.objid = objid
- self.genno = genno
- return
-
- def __repr__(self):
- return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
-
- def resolve(self):
- return self.doc.getobj(self.objid)
-
-
-# resolve
-def resolve1(x):
- '''
- Resolve an object. If this is an array or dictionary,
- it may still contains some indirect objects inside.
- '''
- while isinstance(x, PDFObjRef):
- x = x.resolve()
- return x
-
-def resolve_all(x):
- '''
- Recursively resolve X and all the internals.
- Make sure there is no indirect reference within the nested object.
- This procedure might be slow.
- '''
- while isinstance(x, PDFObjRef):
- x = x.resolve()
- if isinstance(x, list):
- x = [ resolve_all(v) for v in x ]
- elif isinstance(x, dict):
- for (k,v) in x.iteritems():
- x[k] = resolve_all(v)
- return x
-
-def decipher_all(decipher, objid, genno, x):
- '''
- Recursively decipher X.
- '''
- if isinstance(x, str):
- return decipher(objid, genno, x)
- decf = lambda v: decipher_all(decipher, objid, genno, v)
- if isinstance(x, list):
- x = [decf(v) for v in x]
- elif isinstance(x, dict):
- x = dict((k, decf(v)) for (k, v) in x.iteritems())
- return x
-
-
-# Type cheking
-def int_value(x):
- x = resolve1(x)
- if not isinstance(x, int):
- if STRICT:
- raise PDFTypeError('Integer required: %r' % x)
- return 0
- return x
-
-def float_value(x):
- x = resolve1(x)
- if not isinstance(x, float):
- if STRICT:
- raise PDFTypeError('Float required: %r' % x)
- return 0.0
- return x
-
-def num_value(x):
- x = resolve1(x)
- if not (isinstance(x, int) or isinstance(x, float)):
- if STRICT:
- raise PDFTypeError('Int or Float required: %r' % x)
- return 0
- return x
-
-def str_value(x):
- x = resolve1(x)
- if not isinstance(x, str):
- if STRICT:
- raise PDFTypeError('String required: %r' % x)
- return ''
- return x
-
-def list_value(x):
- x = resolve1(x)
- if not (isinstance(x, list) or isinstance(x, tuple)):
- if STRICT:
- raise PDFTypeError('List required: %r' % x)
- return []
- return x
-
-def dict_value(x):
- x = resolve1(x)
- if not isinstance(x, dict):
- if STRICT:
- raise PDFTypeError('Dict required: %r' % x)
- return {}
- return x
-
-def stream_value(x):
- x = resolve1(x)
- if not isinstance(x, PDFStream):
- if STRICT:
- raise PDFTypeError('PDFStream required: %r' % x)
- return PDFStream({}, '')
- return x
-
-# ascii85decode(data)
-def ascii85decode(data):
- n = b = 0
- out = ''
- for c in data:
- if '!' <= c and c <= 'u':
- n += 1
- b = b*85+(ord(c)-33)
- if n == 5:
- out += struct.pack('>L',b)
- n = b = 0
- elif c == 'z':
- assert n == 0
- out += '\0\0\0\0'
- elif c == '~':
- if n:
- for _ in range(5-n):
- b = b*85+84
- out += struct.pack('>L',b)[:n-1]
- break
- return out
-
-
-## PDFStream type
-class PDFStream(PDFObject):
- def __init__(self, dic, rawdata, decipher=None):
- length = int_value(dic.get('Length', 0))
- eol = rawdata[length:]
- # quick and dirty fix for false length attribute,
- # might not work if the pdf stream parser has a problem
- if decipher != None and decipher.__name__ == 'decrypt_aes':
- if (len(rawdata) % 16) != 0:
- cutdiv = len(rawdata) // 16
- rawdata = rawdata[:16*cutdiv]
- else:
- if eol in ('\r', '\n', '\r\n'):
- rawdata = rawdata[:length]
-
- self.dic = dic
- self.rawdata = rawdata
- self.decipher = decipher
- self.data = None
- self.decdata = None
- self.objid = None
- self.genno = None
- return
-
- def set_objid(self, objid, genno):
- self.objid = objid
- self.genno = genno
- return
-
- def __repr__(self):
- if self.rawdata:
- return '<PDFStream(%r): raw=%d, %r>' % \
- (self.objid, len(self.rawdata), self.dic)
- else:
- return '<PDFStream(%r): data=%d, %r>' % \
- (self.objid, len(self.data), self.dic)
-
- def decode(self):
- assert self.data is None and self.rawdata is not None
- data = self.rawdata
- if self.decipher:
- # Handle encryption
- data = self.decipher(self.objid, self.genno, data)
- if gen_xref_stm:
- self.decdata = data # keep decrypted data
- if 'Filter' not in self.dic:
- self.data = data
- self.rawdata = None
- ##print self.dict
- return
- filters = self.dic['Filter']
- if not isinstance(filters, list):
- filters = [ filters ]
- for f in filters:
- if f in LITERALS_FLATE_DECODE:
- # will get errors if the document is encrypted.
- data = zlib.decompress(data)
- elif f in LITERALS_LZW_DECODE:
- data = ''.join(LZWDecoder(StringIO(data)).run())
- elif f in LITERALS_ASCII85_DECODE:
- data = ascii85decode(data)
- elif f == LITERAL_CRYPT:
- raise PDFNotImplementedError('/Crypt filter is unsupported')
- else:
- raise PDFNotImplementedError('Unsupported filter: %r' % f)
- # apply predictors
- if 'DP' in self.dic:
- params = self.dic['DP']
- else:
- params = self.dic.get('DecodeParms', {})
- if 'Predictor' in params:
- pred = int_value(params['Predictor'])
- if pred:
- if pred != 12:
- raise PDFNotImplementedError(
- 'Unsupported predictor: %r' % pred)
- if 'Columns' not in params:
- raise PDFValueError(
- 'Columns undefined for predictor=12')
- columns = int_value(params['Columns'])
- buf = ''
- ent0 = '\x00' * columns
- for i in xrange(0, len(data), columns+1):
- pred = data[i]
- ent1 = data[i+1:i+1+columns]
- if pred == '\x02':
- ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
- for (a,b) in zip(ent0,ent1))
- buf += ent1
- ent0 = ent1
- data = buf
- self.data = data
- self.rawdata = None
- return
-
- def get_data(self):
- if self.data is None:
- self.decode()
- return self.data
-
- def get_rawdata(self):
- return self.rawdata
-
- def get_decdata(self):
- if self.decdata is not None:
- return self.decdata
- data = self.rawdata
- if self.decipher and data:
- # Handle encryption
- data = self.decipher(self.objid, self.genno, data)
+ return (AES, RSA)
+
+AES, RSA = _load_crypto()
+
+META_NAMES = ('mimetype', 'META-INF/rights.xml', 'META-INF/encryption.xml')
+NSMAP = {'adept': 'http://ns.adobe.com/adept',
+ 'enc': 'http://www.w3.org/2001/04/xmlenc#'}
+
+class Decryptor(object):
+ def __init__(self, bookkey, encryption):
+ enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag)
+ self._aes = AES(bookkey)
+ encryption = etree.fromstring(encryption)
+ self._encrypted = encrypted = set()
+ expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'),
+ enc('CipherReference'))
+ for elem in encryption.findall(expr):
+ path = elem.get('URI', None)
+ if path is not None:
+ path = path.encode('utf-8')
+ encrypted.add(path)
+
+ def decompress(self, bytes):
+ dc = zlib.decompressobj(-15)
+ bytes = dc.decompress(bytes)
+ ex = dc.decompress('Z') + dc.flush()
+ if ex:
+ bytes = bytes + ex
+ return bytes
+
+ def decrypt(self, path, data):
+ if path in self._encrypted:
+ data = self._aes.decrypt(data)[16:]
+ data = data[:-ord(data[-1])]
+ data = self.decompress(data)
return data
-
-## PDF Exceptions
-##
-class PDFSyntaxError(PDFException): pass
-class PDFNoValidXRef(PDFSyntaxError): pass
-class PDFEncryptionError(PDFException): pass
-class PDFPasswordIncorrect(PDFEncryptionError): pass
-
-# some predefined literals and keywords.
-LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
-LITERAL_XREF = PSLiteralTable.intern('XRef')
-LITERAL_PAGE = PSLiteralTable.intern('Page')
-LITERAL_PAGES = PSLiteralTable.intern('Pages')
-LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
-
-
-## XRefs
-##
-
-## PDFXRef
-##
-class PDFXRef(object):
-
- def __init__(self):
- self.offsets = None
- return
-
- def __repr__(self):
- return '<PDFXRef: objs=%d>' % len(self.offsets)
-
- def objids(self):
- return self.offsets.iterkeys()
-
- def load(self, parser):
- self.offsets = {}
- while 1:
- try:
- (pos, line) = parser.nextline()
- except PSEOF:
- raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
- if not line:
- raise PDFNoValidXRef('Premature eof: %r' % parser)
- if line.startswith('trailer'):
- parser.seek(pos)
- break
- f = line.strip().split(' ')
- if len(f) != 2:
- raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
- try:
- (start, nobjs) = map(int, f)
- except ValueError:
- raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
- for objid in xrange(start, start+nobjs):
- try:
- (_, line) = parser.nextline()
- except PSEOF:
- raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
- f = line.strip().split(' ')
- if len(f) != 3:
- raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
- (pos, genno, use) = f
- if use != 'n': continue
- self.offsets[objid] = (int(genno), int(pos))
- self.load_trailer(parser)
- return
-
- KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
- def load_trailer(self, parser):
+# check file to make check whether it's probably an Adobe Adept encrypted ePub
+def adeptBook(inpath):
+ with closing(ZipFile(open(inpath, 'rb'))) as inf:
+ namelist = set(inf.namelist())
+ if 'META-INF/rights.xml' not in namelist or \
+ 'META-INF/encryption.xml' not in namelist:
+ return False
try:
- (_,kwd) = parser.nexttoken()
- assert kwd is self.KEYWORD_TRAILER
- (_,dic) = parser.nextobject(direct=True)
- except PSEOF:
- x = parser.pop(1)
- if not x:
- raise PDFNoValidXRef('Unexpected EOF - file corrupted')
- (_,dic) = x[0]
- self.trailer = dict_value(dic)
- return
-
- def getpos(self, objid):
+ rights = etree.fromstring(inf.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) == 172:
+ return True
+ except:
+ # if we couldn't check, assume it is
+ return True
+ return False
+
+def decryptBook(userkey, inpath, outpath):
+ if AES is None:
+ raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
+ rsa = RSA(userkey)
+ with closing(ZipFile(open(inpath, 'rb'))) as inf:
+ namelist = set(inf.namelist())
+ if 'META-INF/rights.xml' not in namelist or \
+ 'META-INF/encryption.xml' not in namelist:
+ print u"{0:s} is DRM-free.".format(os.path.basename(inpath))
+ return 1
+ for name in META_NAMES:
+ namelist.remove(name)
try:
- (genno, pos) = self.offsets[objid]
- except KeyError:
- raise
- return (None, pos)
-
-
-## PDFXRefStream
-##
-class PDFXRefStream(object):
-
- def __init__(self):
- self.index = None
- self.data = None
- self.entlen = None
- self.fl1 = self.fl2 = self.fl3 = None
- return
-
- def __repr__(self):
- return '<PDFXRef: objids=%s>' % self.index
-
- def objids(self):
- for first, size in self.index:
- for objid in xrange(first, first + size):
- yield objid
-
- def load(self, parser, debug=0):
- (_,objid) = parser.nexttoken() # ignored
- (_,genno) = parser.nexttoken() # ignored
- (_,kwd) = parser.nexttoken()
- (_,stream) = parser.nextobject()
- if not isinstance(stream, PDFStream) or \
- stream.dic['Type'] is not LITERAL_XREF:
- raise PDFNoValidXRef('Invalid PDF stream spec.')
- size = stream.dic['Size']
- index = stream.dic.get('Index', (0,size))
- self.index = zip(islice(index, 0, None, 2),
- islice(index, 1, None, 2))
- (self.fl1, self.fl2, self.fl3) = stream.dic['W']
- self.data = stream.get_data()
- self.entlen = self.fl1+self.fl2+self.fl3
- self.trailer = stream.dic
- return
-
- def getpos(self, objid):
- offset = 0
- for first, size in self.index:
- if first <= objid and objid < (first + size):
- break
- offset += size
- else:
- raise KeyError(objid)
- i = self.entlen * ((objid - first) + offset)
- ent = self.data[i:i+self.entlen]
- f1 = nunpack(ent[:self.fl1], 1)
- if f1 == 1:
- pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
- genno = nunpack(ent[self.fl1+self.fl2:])
- return (None, pos)
- elif f1 == 2:
- objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
- index = nunpack(ent[self.fl1+self.fl2:])
- return (objid, index)
- # this is a free object
- raise KeyError(objid)
-
-
-## PDFDocument
-##
-## A PDFDocument object represents a PDF document.
-## Since a PDF file is usually pretty big, normally it is not loaded
-## at once. Rather it is parsed dynamically as processing goes.
-## A PDF parser is associated with the document.
-##
-class PDFDocument(object):
-
- def __init__(self):
- self.xrefs = []
- self.objs = {}
- self.parsed_objs = {}
- self.root = None
- self.catalog = None
- self.parser = None
- self.encryption = None
- self.decipher = None
- return
-
- # set_parser(parser)
- # Associates the document with an (already initialized) parser object.
- def set_parser(self, parser):
- if self.parser: return
- self.parser = parser
- # The document is set to be temporarily ready during collecting
- # all the basic information about the document, e.g.
- # the header, the encryption information, and the access rights
- # for the document.
- self.ready = True
- # Retrieve the information of each header that was appended
- # (maybe multiple times) at the end of the document.
- self.xrefs = parser.read_xref()
- for xref in self.xrefs:
- trailer = xref.trailer
- if not trailer: continue
-
- # If there's an encryption info, remember it.
- if 'Encrypt' in trailer:
- #assert not self.encryption
+ rights = etree.fromstring(inf.read('META-INF/rights.xml'))
+ adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag)
+ expr = './/%s' % (adept('encryptedKey'),)
+ bookkey = ''.join(rights.findtext(expr))
+ if len(bookkey) != 172:
+ print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath))
+ return 1
+ bookkey = rsa.decrypt(bookkey.decode('base64'))
+ # Padded as per RSAES-PKCS1-v1_5
+ if bookkey[-17] != '\x00':
+ print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath))
+ return 2
+ encryption = inf.read('META-INF/encryption.xml')
+ decryptor = Decryptor(bookkey[-16:], encryption)
+ kwds = dict(compression=ZIP_DEFLATED, allowZip64=False)
+ with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf:
+ zi = ZipInfo('mimetype')
+ zi.compress_type=ZIP_STORED
try:
- self.encryption = (list_value(trailer['ID']),
- dict_value(trailer['Encrypt']))
- # fix for bad files
+ # if the mimetype is present, get its info, including time-stamp
+ oldzi = inf.getinfo('mimetype')
+ # copy across fields to be preserved
+ zi.date_time = oldzi.date_time
+ zi.comment = oldzi.comment
+ zi.extra = oldzi.extra
+ zi.internal_attr = oldzi.internal_attr
+ # external attributes are dependent on the create system, so copy both.
+ zi.external_attr = oldzi.external_attr
+ zi.create_system = oldzi.create_system
except:
- self.encryption = ('ffffffffffffffffffffffffffffffffffff',
- dict_value(trailer['Encrypt']))
- if 'Root' in trailer:
- self.set_root(dict_value(trailer['Root']))
- break
- else:
- raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
- # The document is set to be non-ready again, until all the
- # proper initialization (asking the password key and
- # verifying the access permission, so on) is finished.
- self.ready = False
- return
-
- # set_root(root)
- # Set the Root dictionary of the document.
- # Each PDF file must have exactly one /Root dictionary.
- def set_root(self, root):
- self.root = root
- self.catalog = dict_value(self.root)
- if self.catalog.get('Type') is not LITERAL_CATALOG:
- if STRICT:
- raise PDFSyntaxError('Catalog not found!')
- return
- # initialize(password='')
- # Perform the initialization with a given password.
- # This step is mandatory even if there's no password associated
- # with the document.
- def initialize(self, password=''):
- if not self.encryption:
- self.is_printable = self.is_modifiable = self.is_extractable = True
- self.ready = True
- return
- (docid, param) = self.encryption
- type = literal_name(param['Filter'])
- if type == 'Adobe.APS':
- return self.initialize_adobe_ps(password, docid, param)
- if type == 'Standard':
- return self.initialize_standard(password, docid, param)
- if type == 'EBX_HANDLER':
- return self.initialize_ebx(password, docid, param)
- raise PDFEncryptionError('Unknown filter: param=%r' % param)
-
- def initialize_adobe_ps(self, password, docid, param):
- global KEYFILEPATH
- self.decrypt_key = self.genkey_adobe_ps(param)
- self.genkey = self.genkey_v4
- self.decipher = self.decrypt_aes
- self.ready = True
- return
-
- def genkey_adobe_ps(self, param):
- # nice little offline principal keys dictionary
- # global static principal key for German Onleihe / Bibliothek Digital
- principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')}
- self.is_printable = self.is_modifiable = self.is_extractable = True
- length = int_value(param.get('Length', 0)) / 8
- edcdata = str_value(param.get('EDCData')).decode('base64')
- pdrllic = str_value(param.get('PDRLLic')).decode('base64')
- pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
- edclist = []
- for pair in edcdata.split('\n'):
- edclist.append(pair)
- # principal key request
- for key in principalkeys:
- if key in pdrllic:
- principalkey = principalkeys[key]
- else:
- raise ADEPTError('Cannot find principal key for this pdf')
- shakey = SHA256(principalkey)
- ivector = 16 * chr(0)
- plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
- if plaintext[-16:] != 16 * chr(16):
- raise ADEPTError('Offlinekey cannot be decrypted, aborting ...')
- pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
- if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
- raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
- else:
- cutter = -1 * ord(pdrlpol[-1])
- pdrlpol = pdrlpol[:cutter]
- return plaintext[:16]
-
- PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
- '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
- # experimental aes pw support
- def initialize_standard(self, password, docid, param):
- # copy from a global variable
- V = int_value(param.get('V', 0))
- if (V <=0 or V > 4):
- raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
- length = int_value(param.get('Length', 40)) # Key length (bits)
- O = str_value(param['O'])
- R = int_value(param['R']) # Revision
- if 5 <= R:
- raise PDFEncryptionError('Unknown revision: %r' % R)
- U = str_value(param['U'])
- P = int_value(param['P'])
- try:
- EncMetadata = str_value(param['EncryptMetadata'])
- except:
- EncMetadata = 'True'
- self.is_printable = bool(P & 4)
- self.is_modifiable = bool(P & 8)
- self.is_extractable = bool(P & 16)
- self.is_annotationable = bool(P & 32)
- self.is_formsenabled = bool(P & 256)
- self.is_textextractable = bool(P & 512)
- self.is_assemblable = bool(P & 1024)
- self.is_formprintable = bool(P & 2048)
- # Algorithm 3.2
- password = (password+self.PASSWORD_PADDING)[:32] # 1
- hash = hashlib.md5(password) # 2
- hash.update(O) # 3
- hash.update(struct.pack('<l', P)) # 4
- hash.update(docid[0]) # 5
- # aes special handling if metadata isn't encrypted
- if EncMetadata == ('False' or 'false'):
- hash.update('ffffffff'.decode('hex'))
- if 5 <= R:
- # 8
- for _ in xrange(50):
- hash = hashlib.md5(hash.digest()[:length/8])
- key = hash.digest()[:length/8]
- if R == 2:
- # Algorithm 3.4
- u1 = ARC4.new(key).decrypt(password)
- elif R >= 3:
- # Algorithm 3.5
- hash = hashlib.md5(self.PASSWORD_PADDING) # 2
- hash.update(docid[0]) # 3
- x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
- for i in xrange(1,19+1):
- k = ''.join( chr(ord(c) ^ i) for c in key )
- x = ARC4.new(k).decrypt(x)
- u1 = x+x # 32bytes total
- if R == 2:
- is_authenticated = (u1 == U)
- else:
- is_authenticated = (u1[:16] == U[:16])
- if not is_authenticated:
- raise ADEPTError('Password is not correct.')
- self.decrypt_key = key
- # genkey method
- if V == 1 or V == 2:
- self.genkey = self.genkey_v2
- elif V == 3:
- self.genkey = self.genkey_v3
- elif V == 4:
- self.genkey = self.genkey_v2
- #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
- # rc4
- if V != 4:
- self.decipher = self.decipher_rc4 # XXX may be AES
- # aes
- elif V == 4 and Length == 128:
- elf.decipher = self.decipher_aes
- elif V == 4 and Length == 256:
- raise PDFNotImplementedError('AES256 encryption is currently unsupported')
- self.ready = True
- return
-
- def initialize_ebx(self, password, docid, param):
- self.is_printable = self.is_modifiable = self.is_extractable = True
- rsa = RSA(password)
- length = int_value(param.get('Length', 0)) / 8
- rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
- rights = zlib.decompress(rights, -15)
- rights = etree.fromstring(rights)
- expr = './/{http://ns.adobe.com/adept}encryptedKey'
- bookkey = ''.join(rights.findtext(expr)).decode('base64')
- bookkey = rsa.decrypt(bookkey)
- if bookkey[0] != '\x02':
- raise ADEPTError('error decrypting book session key')
- index = bookkey.index('\0') + 1
- bookkey = bookkey[index:]
- ebx_V = int_value(param.get('V', 4))
- ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
- # added because of improper booktype / decryption book session key errors
- if length > 0:
- if len(bookkey) == length:
- if ebx_V == 3:
- V = 3
- else:
- V = 2
- elif len(bookkey) == length + 1:
- V = ord(bookkey[0])
- bookkey = bookkey[1:]
- else:
- print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)
- print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
- print "bookkey[0] is %d" % ord(bookkey[0])
- raise ADEPTError('error decrypting book session key - mismatched length')
- else:
- # proper length unknown try with whatever you have
- print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)
- print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
- print "bookkey[0] is %d" % ord(bookkey[0])
- if ebx_V == 3:
- V = 3
- else:
- V = 2
- self.decrypt_key = bookkey
- self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
- self.decipher = self.decrypt_rc4
- self.ready = True
- return
-
- # genkey functions
- def genkey_v2(self, objid, genno):
- objid = struct.pack('<L', objid)[:3]
- genno = struct.pack('<L', genno)[:2]
- key = self.decrypt_key + objid + genno
- hash = hashlib.md5(key)
- key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
- return key
-
- def genkey_v3(self, objid, genno):
- objid = struct.pack('<L', objid ^ 0x3569ac)
- genno = struct.pack('<L', genno ^ 0xca96)
- key = self.decrypt_key
- key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
- hash = hashlib.md5(key)
- key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
- return key
-
- # aes v2 and v4 algorithm
- def genkey_v4(self, objid, genno):
- objid = struct.pack('<L', objid)[:3]
- genno = struct.pack('<L', genno)[:2]
- key = self.decrypt_key + objid + genno + 'sAlT'
- hash = hashlib.md5(key)
- key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
- return key
-
- def decrypt_aes(self, objid, genno, data):
- key = self.genkey(objid, genno)
- ivector = data[:16]
- data = data[16:]
- plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
- # remove pkcs#5 aes padding
- cutter = -1 * ord(plaintext[-1])
- #print cutter
- plaintext = plaintext[:cutter]
- return plaintext
-
- def decrypt_aes256(self, objid, genno, data):
- key = self.genkey(objid, genno)
- ivector = data[:16]
- data = data[16:]
- plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
- # remove pkcs#5 aes padding
- cutter = -1 * ord(plaintext[-1])
- #print cutter
- plaintext = plaintext[:cutter]
- return plaintext
-
- def decrypt_rc4(self, objid, genno, data):
- key = self.genkey(objid, genno)
- return ARC4.new(key).decrypt(data)
-
-
- KEYWORD_OBJ = PSKeywordTable.intern('obj')
-
- def getobj(self, objid):
- if not self.ready:
- raise PDFException('PDFDocument not initialized')
- #assert self.xrefs
- if objid in self.objs:
- genno = 0
- obj = self.objs[objid]
- else:
- for xref in self.xrefs:
- try:
- (stmid, index) = xref.getpos(objid)
- break
- except KeyError:
pass
- else:
- #if STRICT:
- # raise PDFSyntaxError('Cannot locate objid=%r' % objid)
- return None
- if stmid:
- if gen_xref_stm:
- return PDFObjStmRef(objid, stmid, index)
- # Stuff from pdfminer: extract objects from object stream
- stream = stream_value(self.getobj(stmid))
- if stream.dic.get('Type') is not LITERAL_OBJSTM:
- if STRICT:
- raise PDFSyntaxError('Not a stream object: %r' % stream)
- try:
- n = stream.dic['N']
- except KeyError:
- if STRICT:
- raise PDFSyntaxError('N is not defined: %r' % stream)
- n = 0
-
- if stmid in self.parsed_objs:
- objs = self.parsed_objs[stmid]
- else:
- parser = PDFObjStrmParser(stream.get_data(), self)
- objs = []
+ outf.writestr(zi, inf.read('mimetype'))
+ for path in namelist:
+ data = inf.read(path)
+ zi = ZipInfo(path)
+ zi.compress_type=ZIP_DEFLATED
try:
- while 1:
- (_,obj) = parser.nextobject()
- objs.append(obj)
- except PSEOF:
+ # get the file info, including time-stamp
+ oldzi = inf.getinfo(path)
+ # copy across useful fields
+ zi.date_time = oldzi.date_time
+ zi.comment = oldzi.comment
+ zi.extra = oldzi.extra
+ zi.internal_attr = oldzi.internal_attr
+ # external attributes are dependent on the create system, so copy both.
+ zi.external_attr = oldzi.external_attr
+ zi.create_system = oldzi.create_system
+ except:
pass
- self.parsed_objs[stmid] = objs
- genno = 0
- i = n*2+index
- try:
- obj = objs[i]
- except IndexError:
- raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
- if isinstance(obj, PDFStream):
- obj.set_objid(objid, 0)
- else:
- self.parser.seek(index)
- (_,objid1) = self.parser.nexttoken() # objid
- (_,genno) = self.parser.nexttoken() # genno
- #assert objid1 == objid, (objid, objid1)
- (_,kwd) = self.parser.nexttoken()
- # #### hack around malformed pdf files
- # assert objid1 == objid, (objid, objid1)
-## if objid1 != objid:
-## x = []
-## while kwd is not self.KEYWORD_OBJ:
-## (_,kwd) = self.parser.nexttoken()
-## x.append(kwd)
-## if x:
-## objid1 = x[-2]
-## genno = x[-1]
-##
- if kwd is not self.KEYWORD_OBJ:
- raise PDFSyntaxError(
- 'Invalid object spec: offset=%r' % index)
- (_,obj) = self.parser.nextobject()
- if isinstance(obj, PDFStream):
- obj.set_objid(objid, genno)
- if self.decipher:
- obj = decipher_all(self.decipher, objid, genno, obj)
- self.objs[objid] = obj
- return obj
-
-
-class PDFObjStmRef(object):
- maxindex = 0
- def __init__(self, objid, stmid, index):
- self.objid = objid
- self.stmid = stmid
- self.index = index
- if index > PDFObjStmRef.maxindex:
- PDFObjStmRef.maxindex = index
-
-
-## PDFParser
-##
-class PDFParser(PSStackParser):
-
- def __init__(self, doc, fp):
- PSStackParser.__init__(self, fp)
- self.doc = doc
- self.doc.set_parser(self)
- return
-
- def __repr__(self):
- return '<PDFParser>'
-
- KEYWORD_R = PSKeywordTable.intern('R')
- KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
- KEYWORD_STREAM = PSKeywordTable.intern('stream')
- KEYWORD_XREF = PSKeywordTable.intern('xref')
- KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
- def do_keyword(self, pos, token):
- if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
- self.add_results(*self.pop(1))
- return
- if token is self.KEYWORD_ENDOBJ:
- self.add_results(*self.pop(4))
- return
-
- if token is self.KEYWORD_R:
- # reference to indirect object
- try:
- ((_,objid), (_,genno)) = self.pop(2)
- (objid, genno) = (int(objid), int(genno))
- obj = PDFObjRef(self.doc, objid, genno)
- self.push((pos, obj))
- except PSSyntaxError:
- pass
- return
-
- if token is self.KEYWORD_STREAM:
- # stream object
- ((_,dic),) = self.pop(1)
- dic = dict_value(dic)
- try:
- objlen = int_value(dic['Length'])
- except KeyError:
- if STRICT:
- raise PDFSyntaxError('/Length is undefined: %r' % dic)
- objlen = 0
- self.seek(pos)
- try:
- (_, line) = self.nextline() # 'stream'
- except PSEOF:
- if STRICT:
- raise PDFSyntaxError('Unexpected EOF')
- return
- pos += len(line)
- self.fp.seek(pos)
- data = self.fp.read(objlen)
- self.seek(pos+objlen)
- while 1:
- try:
- (linepos, line) = self.nextline()
- except PSEOF:
- if STRICT:
- raise PDFSyntaxError('Unexpected EOF')
- break
- if 'endstream' in line:
- i = line.index('endstream')
- objlen += i
- data += line[:i]
- break
- objlen += len(line)
- data += line
- self.seek(pos+objlen)
- obj = PDFStream(dic, data, self.doc.decipher)
- self.push((pos, obj))
- return
-
- # others
- self.push((pos, token))
- return
-
- def find_xref(self):
- # search the last xref table by scanning the file backwards.
- prev = None
- for line in self.revreadlines():
- line = line.strip()
- if line == 'startxref': break
- if line:
- prev = line
- else:
- raise PDFNoValidXRef('Unexpected EOF')
- return int(prev)
-
- # read xref table
- def read_xref_from(self, start, xrefs):
- self.seek(start)
- self.reset()
- try:
- (pos, token) = self.nexttoken()
- except PSEOF:
- raise PDFNoValidXRef('Unexpected EOF')
- if isinstance(token, int):
- # XRefStream: PDF-1.5
- if GEN_XREF_STM == 1:
- global gen_xref_stm
- gen_xref_stm = True
- self.seek(pos)
- self.reset()
- xref = PDFXRefStream()
- xref.load(self)
- else:
- if token is not self.KEYWORD_XREF:
- raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
- (pos, token))
- self.nextline()
- xref = PDFXRef()
- xref.load(self)
- xrefs.append(xref)
- trailer = xref.trailer
- if 'XRefStm' in trailer:
- pos = int_value(trailer['XRefStm'])
- self.read_xref_from(pos, xrefs)
- if 'Prev' in trailer:
- # find previous xref
- pos = int_value(trailer['Prev'])
- self.read_xref_from(pos, xrefs)
- return
-
- # read xref tables and trailers
- def read_xref(self):
- xrefs = []
- trailerpos = None
- try:
- pos = self.find_xref()
- self.read_xref_from(pos, xrefs)
- except PDFNoValidXRef:
- # fallback
- self.seek(0)
- pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
- offsets = {}
- xref = PDFXRef()
- while 1:
- try:
- (pos, line) = self.nextline()
- except PSEOF:
- break
- if line.startswith('trailer'):
- trailerpos = pos # remember last trailer
- m = pat.match(line)
- if not m: continue
- (objid, genno) = m.groups()
- offsets[int(objid)] = (0, pos)
- if not offsets: raise
- xref.offsets = offsets
- if trailerpos:
- self.seek(trailerpos)
- xref.load_trailer(self)
- xrefs.append(xref)
- return xrefs
-
-## PDFObjStrmParser
-##
-class PDFObjStrmParser(PDFParser):
-
- def __init__(self, data, doc):
- PSStackParser.__init__(self, StringIO(data))
- self.doc = doc
- return
-
- def flush(self):
- self.add_results(*self.popall())
- return
-
- KEYWORD_R = KWD('R')
- def do_keyword(self, pos, token):
- if token is self.KEYWORD_R:
- # reference to indirect object
- try:
- ((_,objid), (_,genno)) = self.pop(2)
- (objid, genno) = (int(objid), int(genno))
- obj = PDFObjRef(self.doc, objid, genno)
- self.push((pos, obj))
- except PSSyntaxError:
- pass
- return
- # others
- self.push((pos, token))
- return
-
-###
-### My own code, for which there is none else to blame
-
-class PDFSerializer(object):
- def __init__(self, inf, userkey):
- global GEN_XREF_STM, gen_xref_stm
- gen_xref_stm = GEN_XREF_STM > 1
- self.version = inf.read(8)
- inf.seek(0)
- self.doc = doc = PDFDocument()
- parser = PDFParser(doc, inf)
- doc.initialize(userkey)
- self.objids = objids = set()
- for xref in reversed(doc.xrefs):
- trailer = xref.trailer
- for objid in xref.objids():
- objids.add(objid)
- trailer = dict(trailer)
- trailer.pop('Prev', None)
- trailer.pop('XRefStm', None)
- if 'Encrypt' in trailer:
- objids.remove(trailer.pop('Encrypt').objid)
- self.trailer = trailer
-
- def dump(self, outf):
- self.outf = outf
- self.write(self.version)
- self.write('\n%\xe2\xe3\xcf\xd3\n')
- doc = self.doc
- objids = self.objids
- xrefs = {}
- maxobj = max(objids)
- trailer = dict(self.trailer)
- trailer['Size'] = maxobj + 1
- for objid in objids:
- obj = doc.getobj(objid)
- if isinstance(obj, PDFObjStmRef):
- xrefs[objid] = obj
- continue
- if obj is not None:
- try:
- genno = obj.genno
- except AttributeError:
- genno = 0
- xrefs[objid] = (self.tell(), genno)
- self.serialize_indirect(objid, obj)
- startxref = self.tell()
-
- if not gen_xref_stm:
- self.write('xref\n')
- self.write('0 %d\n' % (maxobj + 1,))
- for objid in xrange(0, maxobj + 1):
- if objid in xrefs:
- # force the genno to be 0
- self.write("%010d 00000 n \n" % xrefs[objid][0])
- else:
- self.write("%010d %05d f \n" % (0, 65535))
-
- self.write('trailer\n')
- self.serialize_object(trailer)
- self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
-
- else: # Generate crossref stream.
-
- # Calculate size of entries
- maxoffset = max(startxref, maxobj)
- maxindex = PDFObjStmRef.maxindex
- fl2 = 2
- power = 65536
- while maxoffset >= power:
- fl2 += 1
- power *= 256
- fl3 = 1
- power = 256
- while maxindex >= power:
- fl3 += 1
- power *= 256
-
- index = []
- first = None
- prev = None
- data = []
- # Put the xrefstream's reference in itself
- startxref = self.tell()
- maxobj += 1
- xrefs[maxobj] = (startxref, 0)
- for objid in sorted(xrefs):
- if first is None:
- first = objid
- elif objid != prev + 1:
- index.extend((first, prev - first + 1))
- first = objid
- prev = objid
- objref = xrefs[objid]
- if isinstance(objref, PDFObjStmRef):
- f1 = 2
- f2 = objref.stmid
- f3 = objref.index
- else:
- f1 = 1
- f2 = objref[0]
- # we force all generation numbers to be 0
- # f3 = objref[1]
- f3 = 0
-
- data.append(struct.pack('>B', f1))
- data.append(struct.pack('>L', f2)[-fl2:])
- data.append(struct.pack('>L', f3)[-fl3:])
- index.extend((first, prev - first + 1))
- data = zlib.compress(''.join(data))
- dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
- 'W': [1, fl2, fl3], 'Length': len(data),
- 'Filter': LITERALS_FLATE_DECODE[0],
- 'Root': trailer['Root'],}
- if 'Info' in trailer:
- dic['Info'] = trailer['Info']
- xrefstm = PDFStream(dic, data)
- self.serialize_indirect(maxobj, xrefstm)
- self.write('startxref\n%d\n%%%%EOF' % startxref)
- def write(self, data):
- self.outf.write(data)
- self.last = data[-1:]
-
- def tell(self):
- return self.outf.tell()
-
- def escape_string(self, string):
- string = string.replace('\\', '\\\\')
- string = string.replace('\n', r'\n')
- string = string.replace('(', r'\(')
- string = string.replace(')', r'\)')
- # get rid of ciando id
- regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
- if regularexp.match(string): return ('http://www.ciando.com')
- return string
-
- def serialize_object(self, obj):
- if isinstance(obj, dict):
- # Correct malformed Mac OS resource forks for Stanza
- if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
- and isinstance(obj['Type'], int):
- obj['Subtype'] = obj['Type']
- del obj['Type']
- # end - hope this doesn't have bad effects
- self.write('<<')
- for key, val in obj.items():
- self.write('/%s' % key)
- self.serialize_object(val)
- self.write('>>')
- elif isinstance(obj, list):
- self.write('[')
- for val in obj:
- self.serialize_object(val)
- self.write(']')
- elif isinstance(obj, str):
- self.write('(%s)' % self.escape_string(obj))
- elif isinstance(obj, bool):
- if self.last.isalnum():
- self.write(' ')
- self.write(str(obj).lower())
- elif isinstance(obj, (int, long, float)):
- if self.last.isalnum():
- self.write(' ')
- self.write(str(obj))
- elif isinstance(obj, PDFObjRef):
- if self.last.isalnum():
- self.write(' ')
- self.write('%d %d R' % (obj.objid, 0))
- elif isinstance(obj, PDFStream):
- ### If we don't generate cross ref streams the object streams
- ### are no longer useful, as we have extracted all objects from
- ### them. Therefore leave them out from the output.
- if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
- self.write('(deleted)')
- else:
- data = obj.get_decdata()
- self.serialize_object(obj.dic)
- self.write('stream\n')
- self.write(data)
- self.write('\nendstream')
- else:
- data = str(obj)
- if data[0].isalnum() and self.last.isalnum():
- self.write(' ')
- self.write(data)
-
- def serialize_indirect(self, objid, obj):
- self.write('%d 0 obj' % (objid,))
- self.serialize_object(obj)
- if self.last.isalnum():
- self.write('\n')
- self.write('endobj\n')
-
-
-
-
-def decryptBook(userkey, inpath, outpath):
- if RSA is None:
- raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
- with open(inpath, 'rb') as inf:
- try:
- serializer = PDFSerializer(inf, userkey)
+ outf.writestr(zi, decryptor.decrypt(path, data))
except:
- print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath))
+ print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())
return 2
- # hope this will fix the 'bad file descriptor' problem
- with open(outpath, 'wb') as outf:
- # help construct to make sure the method runs to the end
- try:
- serializer.dump(outf)
- except Exception, e:
- print u"error writing pdf: {0}".format(e.args[0])
- return 2
return 0
@@ -2197,7 +466,7 @@ def cli_main():
argv=unicode_argv()
progname = os.path.basename(argv[0])
if len(argv) != 4:
- print u"usage: {0} <keyfile.der> <inbook.pdf> <outbook.pdf>".format(progname)
+ print u"usage: {0} <keyfile.der> <inbook.epub> <outbook.epub>".format(progname)
return 1
keypath, inpath, outpath = argv[1:]
userkey = open(keypath,'rb').read()
@@ -2206,7 +475,6 @@ def cli_main():
print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
return result
-
def gui_main():
try:
import Tkinter
@@ -2266,8 +534,8 @@ def gui_main():
def get_inpath(self):
inpath = tkFileDialog.askopenfilename(
- parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt",
- defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
+ parent=None, title=u"Select ADEPT-encrypted ePub file to decrypt",
+ defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
if inpath:
inpath = os.path.normpath(inpath)
self.inpath.delete(0, Tkconstants.END)
@@ -2276,8 +544,8 @@ def gui_main():
def get_outpath(self):
outpath = tkFileDialog.asksaveasfilename(
- parent=None, title=u"Select unencrypted PDF file to produce",
- defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
+ parent=None, title=u"Select unencrypted ePub file to produce",
+ defaultextension=u".epub", filetypes=[('ePub files', '.epub')])
if outpath:
outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END)
@@ -2305,30 +573,21 @@ def gui_main():
try:
decrypt_status = decryptBook(userkey, inpath, outpath)
except Exception, e:
- self.status['text'] = u"Error; {0}".format(e.args[0])
+ self.status['text'] = u"Error: {0}".format(e.args[0])
return
if decrypt_status == 0:
self.status['text'] = u"File successfully decrypted"
else:
self.status['text'] = u"The was an error decrypting the file."
-
root = Tkinter.Tk()
- if RSA is None:
- root.withdraw()
- tkMessageBox.showerror(
- "INEPT PDF",
- "This script requires OpenSSL or PyCrypto, which must be installed "
- "separately. Read the top-of-script comment for details.")
- return 1
- root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__))
+ root.title(u"Adobe Adept ePub Decrypter v.{0}".format(__version__))
root.resizable(True, False)
- root.minsize(370, 0)
+ root.minsize(300, 0)
DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
root.mainloop()
return 0
-
if __name__ == '__main__':
if len(sys.argv) > 1:
sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py b/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py
index 929ce57..1986e20 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/ineptpdf.py
@@ -1,89 +1,74 @@
-#!/usr/bin/env python
+#! /usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import with_statement
-# ignobleepub.pyw, version 3.6
-# Copyright © 2009-2012 by DiapDealer et al.
-
-# engine to remove drm from Kindle for Mac and Kindle for PC books
-# for personal use for archiving and converting your ebooks
-
-# PLEASE DO NOT PIRATE EBOOKS!
-
-# We want all authors and publishers, and eBook stores to live
-# long and prosperous lives but at the same time we just want to
-# be able to read OUR books on whatever device we want and to keep
-# readable for a long, long time
-
-# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
-# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
-# and many many others
-# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump
-# from which this script borrows most unashamedly.
-
-
-# Changelog
-# 1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code
-# 1.1 - Adds support for additional kindle.info files
-# 1.2 - Better error handling for older Mobipocket
-# 1.3 - Don't try to decrypt Topaz books
-# 1.7 - Add support for Topaz books and Kindle serial numbers. Split code.
-# 1.9 - Tidy up after Topaz, minor exception changes
-# 2.1 - Topaz fix and filename sanitizing
-# 2.2 - Topaz Fix and minor Mac code fix
-# 2.3 - More Topaz fixes
-# 2.4 - K4PC/Mac key generation fix
-# 2.6 - Better handling of non-K4PC/Mac ebooks
-# 2.7 - Better trailing bytes handling in mobidedrm
-# 2.8 - Moved parsing of kindle.info files to mac & pc util files.
-# 3.1 - Updated for new calibre interface. Now __init__ in plugin.
-# 3.5 - Now support Kindle for PC/Mac 1.6
-# 3.6 - Even better trailing bytes handling in mobidedrm
-# 3.7 - Add support for Amazon Print Replica ebooks.
-# 3.8 - Improved Topaz support
-# 4.1 - Improved Topaz support and faster decryption with alfcrypto
-# 4.2 - Added support for Amazon's KF8 format ebooks
-# 4.4 - Linux calls to Wine added, and improved configuration dialog
-# 4.5 - Linux works again without Wine. Some Mac key file search changes
-# 4.6 - First attempt to handle unicode properly
-# 4.7 - Added timing reports, and changed search for Mac key files
-# 4.8 - Much better unicode handling, matching the updated inept and ignoble scripts
-# - Moved back into plugin, __init__ in plugin now only contains plugin code.
-# 4.9 - Missed some invalid characters in cleanup_name
-# 5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py
-# - tweaked GetDecryptedBook interface to leave passed parameters unchanged
-# 5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 5.2 - Fixed error in command line processing of unicode arguments
-
-__version__ = '5.2'
-
-
-import sys, os, re
-import csv
-import getopt
-import re
-import traceback
-import time
-import htmlentitydefs
-import json
+# ineptpdf.pyw, version 7.11
+# Copyright © 2009-2010 by i♥cabbages
-class DrmException(Exception):
- pass
+# Released under the terms of the GNU General Public Licence, version 3
+# <http://www.gnu.org/licenses/>
+
+# Modified 2010–2012 by some_updates, DiapDealer and Apprentice Alf
+
+# Windows users: Before running this program, you must first install Python 2.6
+# from <http://www.python.org/download/> and PyCrypto from
+# <http://www.voidspace.org.uk/python/modules.shtml#pycrypto> (make sure to
+# install the version for Python 2.6). Save this script file as
+# ineptpdf.pyw and double-click on it to run it.
+#
+# Mac OS X users: Save this script file as ineptpdf.pyw. You can run this
+# program from the command line (pythonw ineptpdf.pyw) or by double-clicking
+# it when it has been associated with PythonLauncher.
-if 'calibre' in sys.modules:
- inCalibre = True
-else:
- inCalibre = False
+# Revision history:
+# 1 - Initial release
+# 2 - Improved determination of key-generation algorithm
+# 3 - Correctly handle PDF >=1.5 cross-reference streams
+# 4 - Removal of ciando's personal ID
+# 5 - Automated decryption of a complete directory
+# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der
+# 7 - Get cross reference streams and object streams working for input.
+# Not yet supported on output but this only effects file size,
+# not functionality. (anon2)
+# 7.1 - Correct a problem when an old trailer is not followed by startxref
+# 7.2 - Correct malformed Mac OS resource forks for Stanza (anon2)
+# - Support for cross ref streams on output (decreases file size)
+# 7.3 - Correct bug in trailer with cross ref stream that caused the error
+# "The root object is missing or invalid" in Adobe Reader. (anon2)
+# 7.4 - Force all generation numbers in output file to be 0, like in v6.
+# Fallback code for wrong xref improved (search till last trailer
+# instead of first) (anon2)
+# 7.5 - allow support for OpenSSL to replace pycrypto on all platforms
+# implemented ARC4 interface to OpenSSL
+# fixed minor typos
+# 7.6 - backported AES and other fixes from version 8.4.48
+# 7.7 - On Windows try PyCrypto first and OpenSSL next
+# 7.8 - Modify interface to allow use of import
+# 7.9 - Bug fix for some session key errors when len(bookkey) > length required
+# 7.10 - Various tweaks to fix minor problems.
+# 7.11 - More tweaks to fix minor problems.
+# 7.12 - Revised to allow use in calibre plugins to eliminate need for duplicate code
+# 7.13 - Fixed erroneous mentions of ineptepub
+# 7.14 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 8.0 - Work if TkInter is missing
+# 8.0.1 - Broken Metadata fix.
-if inCalibre:
- from calibre_plugins.dedrm import mobidedrm
- from calibre_plugins.dedrm import topazextract
- from calibre_plugins.dedrm import kgenpids
-else:
- import mobidedrm
- import topazextract
- import kgenpids
+"""
+Decrypts Adobe ADEPT-encrypted PDF files.
+"""
+
+__license__ = 'GPL v3'
+__version__ = "8.0.1"
+
+import sys
+import os
+import re
+import zlib
+import struct
+import hashlib
+from itertools import chain, islice
+import xml.etree.ElementTree as etree
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -134,192 +119,2217 @@ def unicode_argv():
start = argc.value - len(sys.argv)
return [argv[i] for i in
xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"mobidedrm.py"]
+ return [u"ineptpdf.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
argvencoding = "utf-8"
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-# cleanup unicode filenames
-# borrowed from calibre from calibre/src/calibre/__init__.py
-# added in removal of control (<32) chars
-# and removal of . at start and end
-# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
-def cleanup_name(name):
- # substitute filename unfriendly characters
- name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"")
- # delete control characters
- name = u"".join(char for char in name if ord(char)>=32)
- # white space to single space, delete leading and trailing while space
- name = re.sub(ur"\s", u" ", name).strip()
- # remove leading dots
- while len(name)>0 and name[0] == u".":
- name = name[1:]
- # remove trailing dots (Windows doesn't like them)
- if name.endswith(u'.'):
- name = name[:-1]
- return name
-
-# must be passed unicode
-def unescape(text):
- def fixup(m):
- text = m.group(0)
- if text[:2] == u"&#":
- # character reference
- try:
- if text[:3] == u"&#x":
- return unichr(int(text[3:-1], 16))
+
+class ADEPTError(Exception):
+ pass
+
+
+import hashlib
+
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+
+def _load_crypto_libcrypto():
+ from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, cast
+ from ctypes.util import find_library
+
+ if sys.platform.startswith('win'):
+ libcrypto = find_library('libeay32')
+ else:
+ libcrypto = find_library('crypto')
+
+ if libcrypto is None:
+ raise ADEPTError('libcrypto not found')
+ libcrypto = CDLL(libcrypto)
+
+ AES_MAXNR = 14
+
+ RSA_NO_PADDING = 3
+
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ class RC4_KEY(Structure):
+ _fields_ = [('x', c_int), ('y', c_int), ('box', c_int * 256)]
+ RC4_KEY_p = POINTER(RC4_KEY)
+
+ class RSA(Structure):
+ pass
+ RSA_p = POINTER(RSA)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+
+ RC4_set_key = F(None,'RC4_set_key',[RC4_KEY_p, c_int, c_char_p])
+ RC4_crypt = F(None,'RC4',[RC4_KEY_p, c_int, c_char_p, c_char_p])
+
+ d2i_RSAPrivateKey = F(RSA_p, 'd2i_RSAPrivateKey',
+ [RSA_p, c_char_pp, c_long])
+ RSA_size = F(c_int, 'RSA_size', [RSA_p])
+ RSA_private_decrypt = F(c_int, 'RSA_private_decrypt',
+ [c_int, c_char_p, c_char_p, RSA_p, c_int])
+ RSA_free = F(None, 'RSA_free', [RSA_p])
+
+ class RSA(object):
+ def __init__(self, der):
+ buf = create_string_buffer(der)
+ pp = c_char_pp(cast(buf, c_char_p))
+ rsa = self._rsa = d2i_RSAPrivateKey(None, pp, len(der))
+ if rsa is None:
+ raise ADEPTError('Error parsing ADEPT user key DER')
+
+ def decrypt(self, from_):
+ rsa = self._rsa
+ to = create_string_buffer(RSA_size(rsa))
+ dlen = RSA_private_decrypt(len(from_), from_, to, rsa,
+ RSA_NO_PADDING)
+ if dlen < 0:
+ raise ADEPTError('RSA decryption failed')
+ return to[1:dlen]
+
+ def __del__(self):
+ if self._rsa is not None:
+ RSA_free(self._rsa)
+ self._rsa = None
+
+ class ARC4(object):
+ @classmethod
+ def new(cls, userkey):
+ self = ARC4()
+ self._blocksize = len(userkey)
+ key = self._key = RC4_KEY()
+ RC4_set_key(key, self._blocksize, userkey)
+ return self
+ def __init__(self):
+ self._blocksize = 0
+ self._key = None
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ RC4_crypt(self._key, len(data), data, out)
+ return out.raw
+
+ class AES(object):
+ MODE_CBC = 0
+ @classmethod
+ def new(cls, userkey, mode, iv):
+ self = AES()
+ self._blocksize = len(userkey)
+ # mode is ignored since CBCMODE is only thing supported/used so far
+ self._mode = mode
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise ADEPTError('AES improper key used')
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self._iv = iv
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise ADEPTError('Failed to initialize AES key')
+ return self
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self._iv = 0
+ self._mode = 0
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ rv = AES_cbc_encrypt(data, out, len(data), self._keyctx, self._iv, 0)
+ if rv == 0:
+ raise ADEPTError('AES decryption failed')
+ return out.raw
+
+ return (ARC4, RSA, AES)
+
+
+def _load_crypto_pycrypto():
+ from Crypto.PublicKey import RSA as _RSA
+ from Crypto.Cipher import ARC4 as _ARC4
+ from Crypto.Cipher import AES as _AES
+
+ # ASN.1 parsing code from tlslite
+ class ASN1Error(Exception):
+ pass
+
+ class ASN1Parser(object):
+ class Parser(object):
+ def __init__(self, bytes):
+ self.bytes = bytes
+ self.index = 0
+
+ def get(self, length):
+ if self.index + length > len(self.bytes):
+ raise ASN1Error("Error decoding ASN.1")
+ x = 0
+ for count in range(length):
+ x <<= 8
+ x |= self.bytes[self.index]
+ self.index += 1
+ return x
+
+ def getFixBytes(self, lengthBytes):
+ bytes = self.bytes[self.index : self.index+lengthBytes]
+ self.index += lengthBytes
+ return bytes
+
+ def getVarBytes(self, lengthLength):
+ lengthBytes = self.get(lengthLength)
+ return self.getFixBytes(lengthBytes)
+
+ def getFixList(self, length, lengthList):
+ l = [0] * lengthList
+ for x in range(lengthList):
+ l[x] = self.get(length)
+ return l
+
+ def getVarList(self, length, lengthLength):
+ lengthList = self.get(lengthLength)
+ if lengthList % length != 0:
+ raise ASN1Error("Error decoding ASN.1")
+ lengthList = int(lengthList/length)
+ l = [0] * lengthList
+ for x in range(lengthList):
+ l[x] = self.get(length)
+ return l
+
+ def startLengthCheck(self, lengthLength):
+ self.lengthCheck = self.get(lengthLength)
+ self.indexCheck = self.index
+
+ def setLengthCheck(self, length):
+ self.lengthCheck = length
+ self.indexCheck = self.index
+
+ def stopLengthCheck(self):
+ if (self.index - self.indexCheck) != self.lengthCheck:
+ raise ASN1Error("Error decoding ASN.1")
+
+ def atLengthCheck(self):
+ if (self.index - self.indexCheck) < self.lengthCheck:
+ return False
+ elif (self.index - self.indexCheck) == self.lengthCheck:
+ return True
else:
- return unichr(int(text[2:-1]))
+ raise ASN1Error("Error decoding ASN.1")
+
+ def __init__(self, bytes):
+ p = self.Parser(bytes)
+ p.get(1)
+ self.length = self._getASN1Length(p)
+ self.value = p.getFixBytes(self.length)
+
+ def getChild(self, which):
+ p = self.Parser(self.value)
+ for x in range(which+1):
+ markIndex = p.index
+ p.get(1)
+ length = self._getASN1Length(p)
+ p.getFixBytes(length)
+ return ASN1Parser(p.bytes[markIndex:p.index])
+
+ def _getASN1Length(self, p):
+ firstLength = p.get(1)
+ if firstLength<=127:
+ return firstLength
+ else:
+ lengthLength = firstLength & 0x7F
+ return p.get(lengthLength)
+
+ class ARC4(object):
+ @classmethod
+ def new(cls, userkey):
+ self = ARC4()
+ self._arc4 = _ARC4.new(userkey)
+ return self
+ def __init__(self):
+ self._arc4 = None
+ def decrypt(self, data):
+ return self._arc4.decrypt(data)
+
+ class AES(object):
+ MODE_CBC = _AES.MODE_CBC
+ @classmethod
+ def new(cls, userkey, mode, iv):
+ self = AES()
+ self._aes = _AES.new(userkey, mode, iv)
+ return self
+ def __init__(self):
+ self._aes = None
+ def decrypt(self, data):
+ return self._aes.decrypt(data)
+
+ class RSA(object):
+ def __init__(self, der):
+ key = ASN1Parser([ord(x) for x in der])
+ key = [key.getChild(x).value for x in xrange(1, 4)]
+ key = [self.bytesToNumber(v) for v in key]
+ self._rsa = _RSA.construct(key)
+
+ def bytesToNumber(self, bytes):
+ total = 0L
+ for byte in bytes:
+ total = (total << 8) + byte
+ return total
+
+ def decrypt(self, data):
+ return self._rsa.decrypt(data)
+
+ return (ARC4, RSA, AES)
+
+def _load_crypto():
+ ARC4 = RSA = AES = None
+ cryptolist = (_load_crypto_libcrypto, _load_crypto_pycrypto)
+ if sys.platform.startswith('win'):
+ cryptolist = (_load_crypto_pycrypto, _load_crypto_libcrypto)
+ for loader in cryptolist:
+ try:
+ ARC4, RSA, AES = loader()
+ break
+ except (ImportError, ADEPTError):
+ pass
+ return (ARC4, RSA, AES)
+ARC4, RSA, AES = _load_crypto()
+
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+
+# Do we generate cross reference streams on output?
+# 0 = never
+# 1 = only if present in input
+# 2 = always
+
+GEN_XREF_STM = 1
+
+# This is the value for the current document
+gen_xref_stm = False # will be set in PDFSerializer
+
+# PDF parsing routines from pdfminer, with changes for EBX_HANDLER
+
+# Utilities
+
+def choplist(n, seq):
+ '''Groups every n elements of the list.'''
+ r = []
+ for x in seq:
+ r.append(x)
+ if len(r) == n:
+ yield tuple(r)
+ r = []
+ return
+
+def nunpack(s, default=0):
+ '''Unpacks up to 4 bytes big endian.'''
+ l = len(s)
+ if not l:
+ return default
+ elif l == 1:
+ return ord(s)
+ elif l == 2:
+ return struct.unpack('>H', s)[0]
+ elif l == 3:
+ return struct.unpack('>L', '\x00'+s)[0]
+ elif l == 4:
+ return struct.unpack('>L', s)[0]
+ else:
+ return TypeError('invalid length: %d' % l)
+
+
+STRICT = 0
+
+
+# PS Exceptions
+
+class PSException(Exception): pass
+class PSEOF(PSException): pass
+class PSSyntaxError(PSException): pass
+class PSTypeError(PSException): pass
+class PSValueError(PSException): pass
+
+
+# Basic PostScript Types
+
+
+# PSLiteral
+class PSObject(object): pass
+
+class PSLiteral(PSObject):
+ '''
+ PS literals (e.g. "/Name").
+ Caution: Never create these objects directly.
+ Use PSLiteralTable.intern() instead.
+ '''
+ def __init__(self, name):
+ self.name = name
+ return
+
+ def __repr__(self):
+ name = []
+ for char in self.name:
+ if not char.isalnum():
+ char = '#%02x' % ord(char)
+ name.append(char)
+ return '/%s' % ''.join(name)
+
+# PSKeyword
+class PSKeyword(PSObject):
+ '''
+ PS keywords (e.g. "showpage").
+ Caution: Never create these objects directly.
+ Use PSKeywordTable.intern() instead.
+ '''
+ def __init__(self, name):
+ self.name = name
+ return
+
+ def __repr__(self):
+ return self.name
+
+# PSSymbolTable
+class PSSymbolTable(object):
+
+ '''
+ Symbol table that stores PSLiteral or PSKeyword.
+ '''
+
+ def __init__(self, classe):
+ self.dic = {}
+ self.classe = classe
+ return
+
+ def intern(self, name):
+ if name in self.dic:
+ lit = self.dic[name]
+ else:
+ lit = self.classe(name)
+ self.dic[name] = lit
+ return lit
+
+PSLiteralTable = PSSymbolTable(PSLiteral)
+PSKeywordTable = PSSymbolTable(PSKeyword)
+LIT = PSLiteralTable.intern
+KWD = PSKeywordTable.intern
+KEYWORD_BRACE_BEGIN = KWD('{')
+KEYWORD_BRACE_END = KWD('}')
+KEYWORD_ARRAY_BEGIN = KWD('[')
+KEYWORD_ARRAY_END = KWD(']')
+KEYWORD_DICT_BEGIN = KWD('<<')
+KEYWORD_DICT_END = KWD('>>')
+
+
+def literal_name(x):
+ if not isinstance(x, PSLiteral):
+ if STRICT:
+ raise PSTypeError('Literal required: %r' % x)
+ else:
+ return str(x)
+ return x.name
+
+def keyword_name(x):
+ if not isinstance(x, PSKeyword):
+ if STRICT:
+ raise PSTypeError('Keyword required: %r' % x)
+ else:
+ return str(x)
+ return x.name
+
+
+## PSBaseParser
+##
+EOL = re.compile(r'[\r\n]')
+SPC = re.compile(r'\s')
+NONSPC = re.compile(r'\S')
+HEX = re.compile(r'[0-9a-fA-F]')
+END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]')
+END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]')
+HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')
+END_NUMBER = re.compile(r'[^0-9]')
+END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]')
+END_STRING = re.compile(r'[()\134]')
+OCT_STRING = re.compile(r'[0-7]')
+ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
+
+class PSBaseParser(object):
+
+ '''
+ Most basic PostScript parser that performs only basic tokenization.
+ '''
+ BUFSIZ = 4096
+
+ def __init__(self, fp):
+ self.fp = fp
+ self.seek(0)
+ return
+
+ def __repr__(self):
+ return '<PSBaseParser: %r, bufpos=%d>' % (self.fp, self.bufpos)
+
+ def flush(self):
+ return
+
+ def close(self):
+ self.flush()
+ return
+
+ def tell(self):
+ return self.bufpos+self.charpos
+
+ def poll(self, pos=None, n=80):
+ pos0 = self.fp.tell()
+ if not pos:
+ pos = self.bufpos+self.charpos
+ self.fp.seek(pos)
+ ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n))
+ self.fp.seek(pos0)
+ return
+
+ def seek(self, pos):
+ '''
+ Seeks the parser to the given position.
+ '''
+ self.fp.seek(pos)
+ # reset the status for nextline()
+ self.bufpos = pos
+ self.buf = ''
+ self.charpos = 0
+ # reset the status for nexttoken()
+ self.parse1 = self.parse_main
+ self.tokens = []
+ return
+
+ def fillbuf(self):
+ if self.charpos < len(self.buf): return
+ # fetch next chunk.
+ self.bufpos = self.fp.tell()
+ self.buf = self.fp.read(self.BUFSIZ)
+ if not self.buf:
+ raise PSEOF('Unexpected EOF')
+ self.charpos = 0
+ return
+
+ def parse_main(self, s, i):
+ m = NONSPC.search(s, i)
+ if not m:
+ return (self.parse_main, len(s))
+ j = m.start(0)
+ c = s[j]
+ self.tokenstart = self.bufpos+j
+ if c == '%':
+ self.token = '%'
+ return (self.parse_comment, j+1)
+ if c == '/':
+ self.token = ''
+ return (self.parse_literal, j+1)
+ if c in '-+' or c.isdigit():
+ self.token = c
+ return (self.parse_number, j+1)
+ if c == '.':
+ self.token = c
+ return (self.parse_float, j+1)
+ if c.isalpha():
+ self.token = c
+ return (self.parse_keyword, j+1)
+ if c == '(':
+ self.token = ''
+ self.paren = 1
+ return (self.parse_string, j+1)
+ if c == '<':
+ self.token = ''
+ return (self.parse_wopen, j+1)
+ if c == '>':
+ self.token = ''
+ return (self.parse_wclose, j+1)
+ self.add_token(KWD(c))
+ return (self.parse_main, j+1)
+
+ def add_token(self, obj):
+ self.tokens.append((self.tokenstart, obj))
+ return
+
+ def parse_comment(self, s, i):
+ m = EOL.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_comment, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ # We ignore comments.
+ #self.tokens.append(self.token)
+ return (self.parse_main, j)
+
+ def parse_literal(self, s, i):
+ m = END_LITERAL.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_literal, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '#':
+ self.hex = ''
+ return (self.parse_literal_hex, j+1)
+ self.add_token(LIT(self.token))
+ return (self.parse_main, j)
+
+ def parse_literal_hex(self, s, i):
+ c = s[i]
+ if HEX.match(c) and len(self.hex) < 2:
+ self.hex += c
+ return (self.parse_literal_hex, i+1)
+ if self.hex:
+ self.token += chr(int(self.hex, 16))
+ return (self.parse_literal, i)
+
+ def parse_number(self, s, i):
+ m = END_NUMBER.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_number, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '.':
+ self.token += c
+ return (self.parse_float, j+1)
+ try:
+ self.add_token(int(self.token))
+ except ValueError:
+ pass
+ return (self.parse_main, j)
+ def parse_float(self, s, i):
+ m = END_NUMBER.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_float, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ self.add_token(float(self.token))
+ return (self.parse_main, j)
+
+ def parse_keyword(self, s, i):
+ m = END_KEYWORD.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_keyword, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ if self.token == 'true':
+ token = True
+ elif self.token == 'false':
+ token = False
+ else:
+ token = KWD(self.token)
+ self.add_token(token)
+ return (self.parse_main, j)
+
+ def parse_string(self, s, i):
+ m = END_STRING.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_string, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ c = s[j]
+ if c == '\\':
+ self.oct = ''
+ return (self.parse_string_1, j+1)
+ if c == '(':
+ self.paren += 1
+ self.token += c
+ return (self.parse_string, j+1)
+ if c == ')':
+ self.paren -= 1
+ if self.paren:
+ self.token += c
+ return (self.parse_string, j+1)
+ self.add_token(self.token)
+ return (self.parse_main, j+1)
+ def parse_string_1(self, s, i):
+ c = s[i]
+ if OCT_STRING.match(c) and len(self.oct) < 3:
+ self.oct += c
+ return (self.parse_string_1, i+1)
+ if self.oct:
+ self.token += chr(int(self.oct, 8))
+ return (self.parse_string, i)
+ if c in ESC_STRING:
+ self.token += chr(ESC_STRING[c])
+ return (self.parse_string, i+1)
+
+ def parse_wopen(self, s, i):
+ c = s[i]
+ if c.isspace() or HEX.match(c):
+ return (self.parse_hexstring, i)
+ if c == '<':
+ self.add_token(KEYWORD_DICT_BEGIN)
+ i += 1
+ return (self.parse_main, i)
+
+ def parse_wclose(self, s, i):
+ c = s[i]
+ if c == '>':
+ self.add_token(KEYWORD_DICT_END)
+ i += 1
+ return (self.parse_main, i)
+
+ def parse_hexstring(self, s, i):
+ m = END_HEX_STRING.search(s, i)
+ if not m:
+ self.token += s[i:]
+ return (self.parse_hexstring, len(s))
+ j = m.start(0)
+ self.token += s[i:j]
+ token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)),
+ SPC.sub('', self.token))
+ self.add_token(token)
+ return (self.parse_main, j)
+
+ def nexttoken(self):
+ while not self.tokens:
+ self.fillbuf()
+ (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos)
+ token = self.tokens.pop(0)
+ return token
+
+ def nextline(self):
+ '''
+ Fetches a next line that ends either with \\r or \\n.
+ '''
+ linebuf = ''
+ linepos = self.bufpos + self.charpos
+ eol = False
+ while 1:
+ self.fillbuf()
+ if eol:
+ c = self.buf[self.charpos]
+ # handle '\r\n'
+ if c == '\n':
+ linebuf += c
+ self.charpos += 1
+ break
+ m = EOL.search(self.buf, self.charpos)
+ if m:
+ linebuf += self.buf[self.charpos:m.end(0)]
+ self.charpos = m.end(0)
+ if linebuf[-1] == '\r':
+ eol = True
+ else:
+ break
+ else:
+ linebuf += self.buf[self.charpos:]
+ self.charpos = len(self.buf)
+ return (linepos, linebuf)
+
+ def revreadlines(self):
+ '''
+ Fetches a next line backword. This is used to locate
+ the trailers at the end of a file.
+ '''
+ self.fp.seek(0, 2)
+ pos = self.fp.tell()
+ buf = ''
+ while 0 < pos:
+ prevpos = pos
+ pos = max(0, pos-self.BUFSIZ)
+ self.fp.seek(pos)
+ s = self.fp.read(prevpos-pos)
+ if not s: break
+ while 1:
+ n = max(s.rfind('\r'), s.rfind('\n'))
+ if n == -1:
+ buf = s + buf
+ break
+ yield s[n:]+buf
+ s = s[:n]
+ buf = ''
+ return
+
+
+## PSStackParser
+##
+class PSStackParser(PSBaseParser):
+
+ def __init__(self, fp):
+ PSBaseParser.__init__(self, fp)
+ self.reset()
+ return
+
+ def reset(self):
+ self.context = []
+ self.curtype = None
+ self.curstack = []
+ self.results = []
+ return
+
+ def seek(self, pos):
+ PSBaseParser.seek(self, pos)
+ self.reset()
+ return
+
+ def push(self, *objs):
+ self.curstack.extend(objs)
+ return
+ def pop(self, n):
+ objs = self.curstack[-n:]
+ self.curstack[-n:] = []
+ return objs
+ def popall(self):
+ objs = self.curstack
+ self.curstack = []
+ return objs
+ def add_results(self, *objs):
+ self.results.extend(objs)
+ return
+
+ def start_type(self, pos, type):
+ self.context.append((pos, self.curtype, self.curstack))
+ (self.curtype, self.curstack) = (type, [])
+ return
+ def end_type(self, type):
+ if self.curtype != type:
+ raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type))
+ objs = [ obj for (_,obj) in self.curstack ]
+ (pos, self.curtype, self.curstack) = self.context.pop()
+ return (pos, objs)
+
+ def do_keyword(self, pos, token):
+ return
+
+ def nextobject(self, direct=False):
+ '''
+ Yields a list of objects: keywords, literals, strings,
+ numbers, arrays and dictionaries. Arrays and dictionaries
+ are represented as Python sequence and dictionaries.
+ '''
+ while not self.results:
+ (pos, token) = self.nexttoken()
+ ##print (pos,token), (self.curtype, self.curstack)
+ if (isinstance(token, int) or
+ isinstance(token, float) or
+ isinstance(token, bool) or
+ isinstance(token, str) or
+ isinstance(token, PSLiteral)):
+ # normal token
+ self.push((pos, token))
+ elif token == KEYWORD_ARRAY_BEGIN:
+ # begin array
+ self.start_type(pos, 'a')
+ elif token == KEYWORD_ARRAY_END:
+ # end array
+ try:
+ self.push(self.end_type('a'))
+ except PSTypeError:
+ if STRICT: raise
+ elif token == KEYWORD_DICT_BEGIN:
+ # begin dictionary
+ self.start_type(pos, 'd')
+ elif token == KEYWORD_DICT_END:
+ # end dictionary
+ try:
+ (pos, objs) = self.end_type('d')
+ if len(objs) % 2 != 0:
+ print "Incomplete dictionary construct"
+ objs.append("") # this isn't necessary.
+ # temporary fix. is this due to rental books?
+ # raise PSSyntaxError(
+ # 'Invalid dictionary construct: %r' % objs)
+ d = dict((literal_name(k), v) \
+ for (k,v) in choplist(2, objs))
+ self.push((pos, d))
+ except PSTypeError:
+ if STRICT: raise
+ else:
+ self.do_keyword(pos, token)
+ if self.context:
+ continue
+ else:
+ if direct:
+ return self.pop(1)[0]
+ self.flush()
+ obj = self.results.pop(0)
+ return obj
+
+
+LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
+LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
+LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
+LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
+
+
+## PDF Objects
+##
+class PDFObject(PSObject): pass
+
+class PDFException(PSException): pass
+class PDFTypeError(PDFException): pass
+class PDFValueError(PDFException): pass
+class PDFNotImplementedError(PSException): pass
+
+
+## PDFObjRef
+##
+class PDFObjRef(PDFObject):
+
+ def __init__(self, doc, objid, genno):
+ if objid == 0:
+ if STRICT:
+ raise PDFValueError('PDF object id cannot be 0.')
+ self.doc = doc
+ self.objid = objid
+ self.genno = genno
+ return
+
+ def __repr__(self):
+ return '<PDFObjRef:%d %d>' % (self.objid, self.genno)
+
+ def resolve(self):
+ return self.doc.getobj(self.objid)
+
+
+# resolve
+def resolve1(x):
+ '''
+ Resolve an object. If this is an array or dictionary,
+ it may still contains some indirect objects inside.
+ '''
+ while isinstance(x, PDFObjRef):
+ x = x.resolve()
+ return x
+
+def resolve_all(x):
+ '''
+ Recursively resolve X and all the internals.
+ Make sure there is no indirect reference within the nested object.
+ This procedure might be slow.
+ '''
+ while isinstance(x, PDFObjRef):
+ x = x.resolve()
+ if isinstance(x, list):
+ x = [ resolve_all(v) for v in x ]
+ elif isinstance(x, dict):
+ for (k,v) in x.iteritems():
+ x[k] = resolve_all(v)
+ return x
+
+def decipher_all(decipher, objid, genno, x):
+ '''
+ Recursively decipher X.
+ '''
+ if isinstance(x, str):
+ return decipher(objid, genno, x)
+ decf = lambda v: decipher_all(decipher, objid, genno, v)
+ if isinstance(x, list):
+ x = [decf(v) for v in x]
+ elif isinstance(x, dict):
+ x = dict((k, decf(v)) for (k, v) in x.iteritems())
+ return x
+
+
+# Type cheking
+def int_value(x):
+ x = resolve1(x)
+ if not isinstance(x, int):
+ if STRICT:
+ raise PDFTypeError('Integer required: %r' % x)
+ return 0
+ return x
+
+def float_value(x):
+ x = resolve1(x)
+ if not isinstance(x, float):
+ if STRICT:
+ raise PDFTypeError('Float required: %r' % x)
+ return 0.0
+ return x
+
+def num_value(x):
+ x = resolve1(x)
+ if not (isinstance(x, int) or isinstance(x, float)):
+ if STRICT:
+ raise PDFTypeError('Int or Float required: %r' % x)
+ return 0
+ return x
+
+def str_value(x):
+ x = resolve1(x)
+ if not isinstance(x, str):
+ if STRICT:
+ raise PDFTypeError('String required: %r' % x)
+ return ''
+ return x
+
+def list_value(x):
+ x = resolve1(x)
+ if not (isinstance(x, list) or isinstance(x, tuple)):
+ if STRICT:
+ raise PDFTypeError('List required: %r' % x)
+ return []
+ return x
+
+def dict_value(x):
+ x = resolve1(x)
+ if not isinstance(x, dict):
+ if STRICT:
+ raise PDFTypeError('Dict required: %r' % x)
+ return {}
+ return x
+
+def stream_value(x):
+ x = resolve1(x)
+ if not isinstance(x, PDFStream):
+ if STRICT:
+ raise PDFTypeError('PDFStream required: %r' % x)
+ return PDFStream({}, '')
+ return x
+
+# ascii85decode(data)
+def ascii85decode(data):
+ n = b = 0
+ out = ''
+ for c in data:
+ if '!' <= c and c <= 'u':
+ n += 1
+ b = b*85+(ord(c)-33)
+ if n == 5:
+ out += struct.pack('>L',b)
+ n = b = 0
+ elif c == 'z':
+ assert n == 0
+ out += '\0\0\0\0'
+ elif c == '~':
+ if n:
+ for _ in range(5-n):
+ b = b*85+84
+ out += struct.pack('>L',b)[:n-1]
+ break
+ return out
+
+
+## PDFStream type
+class PDFStream(PDFObject):
+ def __init__(self, dic, rawdata, decipher=None):
+ length = int_value(dic.get('Length', 0))
+ eol = rawdata[length:]
+ # quick and dirty fix for false length attribute,
+ # might not work if the pdf stream parser has a problem
+ if decipher != None and decipher.__name__ == 'decrypt_aes':
+ if (len(rawdata) % 16) != 0:
+ cutdiv = len(rawdata) // 16
+ rawdata = rawdata[:16*cutdiv]
+ else:
+ if eol in ('\r', '\n', '\r\n'):
+ rawdata = rawdata[:length]
+
+ self.dic = dic
+ self.rawdata = rawdata
+ self.decipher = decipher
+ self.data = None
+ self.decdata = None
+ self.objid = None
+ self.genno = None
+ return
+
+ def set_objid(self, objid, genno):
+ self.objid = objid
+ self.genno = genno
+ return
+
+ def __repr__(self):
+ if self.rawdata:
+ return '<PDFStream(%r): raw=%d, %r>' % \
+ (self.objid, len(self.rawdata), self.dic)
+ else:
+ return '<PDFStream(%r): data=%d, %r>' % \
+ (self.objid, len(self.data), self.dic)
+
+ def decode(self):
+ assert self.data is None and self.rawdata is not None
+ data = self.rawdata
+ if self.decipher:
+ # Handle encryption
+ data = self.decipher(self.objid, self.genno, data)
+ if gen_xref_stm:
+ self.decdata = data # keep decrypted data
+ if 'Filter' not in self.dic:
+ self.data = data
+ self.rawdata = None
+ ##print self.dict
+ return
+ filters = self.dic['Filter']
+ if not isinstance(filters, list):
+ filters = [ filters ]
+ for f in filters:
+ if f in LITERALS_FLATE_DECODE:
+ # will get errors if the document is encrypted.
+ data = zlib.decompress(data)
+ elif f in LITERALS_LZW_DECODE:
+ data = ''.join(LZWDecoder(StringIO(data)).run())
+ elif f in LITERALS_ASCII85_DECODE:
+ data = ascii85decode(data)
+ elif f == LITERAL_CRYPT:
+ raise PDFNotImplementedError('/Crypt filter is unsupported')
+ else:
+ raise PDFNotImplementedError('Unsupported filter: %r' % f)
+ # apply predictors
+ if 'DP' in self.dic:
+ params = self.dic['DP']
+ else:
+ params = self.dic.get('DecodeParms', {})
+ if 'Predictor' in params:
+ pred = int_value(params['Predictor'])
+ if pred:
+ if pred != 12:
+ raise PDFNotImplementedError(
+ 'Unsupported predictor: %r' % pred)
+ if 'Columns' not in params:
+ raise PDFValueError(
+ 'Columns undefined for predictor=12')
+ columns = int_value(params['Columns'])
+ buf = ''
+ ent0 = '\x00' * columns
+ for i in xrange(0, len(data), columns+1):
+ pred = data[i]
+ ent1 = data[i+1:i+1+columns]
+ if pred == '\x02':
+ ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
+ for (a,b) in zip(ent0,ent1))
+ buf += ent1
+ ent0 = ent1
+ data = buf
+ self.data = data
+ self.rawdata = None
+ return
+
+ def get_data(self):
+ if self.data is None:
+ self.decode()
+ return self.data
+
+ def get_rawdata(self):
+ return self.rawdata
+
+ def get_decdata(self):
+ if self.decdata is not None:
+ return self.decdata
+ data = self.rawdata
+ if self.decipher and data:
+ # Handle encryption
+ data = self.decipher(self.objid, self.genno, data)
+ return data
+
+
+## PDF Exceptions
+##
+class PDFSyntaxError(PDFException): pass
+class PDFNoValidXRef(PDFSyntaxError): pass
+class PDFEncryptionError(PDFException): pass
+class PDFPasswordIncorrect(PDFEncryptionError): pass
+
+# some predefined literals and keywords.
+LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm')
+LITERAL_XREF = PSLiteralTable.intern('XRef')
+LITERAL_PAGE = PSLiteralTable.intern('Page')
+LITERAL_PAGES = PSLiteralTable.intern('Pages')
+LITERAL_CATALOG = PSLiteralTable.intern('Catalog')
+
+
+## XRefs
+##
+
+## PDFXRef
+##
+class PDFXRef(object):
+
+ def __init__(self):
+ self.offsets = None
+ return
+
+ def __repr__(self):
+ return '<PDFXRef: objs=%d>' % len(self.offsets)
+
+ def objids(self):
+ return self.offsets.iterkeys()
+
+ def load(self, parser):
+ self.offsets = {}
+ while 1:
+ try:
+ (pos, line) = parser.nextline()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+ if not line:
+ raise PDFNoValidXRef('Premature eof: %r' % parser)
+ if line.startswith('trailer'):
+ parser.seek(pos)
+ break
+ f = line.strip().split(' ')
+ if len(f) != 2:
+ raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line))
+ try:
+ (start, nobjs) = map(int, f)
except ValueError:
- pass
+ raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line))
+ for objid in xrange(start, start+nobjs):
+ try:
+ (_, line) = parser.nextline()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted?')
+ f = line.strip().split(' ')
+ if len(f) != 3:
+ raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line))
+ (pos, genno, use) = f
+ if use != 'n': continue
+ self.offsets[objid] = (int(genno), int(pos))
+ self.load_trailer(parser)
+ return
+
+ KEYWORD_TRAILER = PSKeywordTable.intern('trailer')
+ def load_trailer(self, parser):
+ try:
+ (_,kwd) = parser.nexttoken()
+ assert kwd is self.KEYWORD_TRAILER
+ (_,dic) = parser.nextobject(direct=True)
+ except PSEOF:
+ x = parser.pop(1)
+ if not x:
+ raise PDFNoValidXRef('Unexpected EOF - file corrupted')
+ (_,dic) = x[0]
+ self.trailer = dict_value(dic)
+ return
+
+ def getpos(self, objid):
+ try:
+ (genno, pos) = self.offsets[objid]
+ except KeyError:
+ raise
+ return (None, pos)
+
+
+## PDFXRefStream
+##
+class PDFXRefStream(object):
+
+ def __init__(self):
+ self.index = None
+ self.data = None
+ self.entlen = None
+ self.fl1 = self.fl2 = self.fl3 = None
+ return
+
+ def __repr__(self):
+ return '<PDFXRef: objids=%s>' % self.index
+
+ def objids(self):
+ for first, size in self.index:
+ for objid in xrange(first, first + size):
+ yield objid
+
+ def load(self, parser, debug=0):
+ (_,objid) = parser.nexttoken() # ignored
+ (_,genno) = parser.nexttoken() # ignored
+ (_,kwd) = parser.nexttoken()
+ (_,stream) = parser.nextobject()
+ if not isinstance(stream, PDFStream) or \
+ stream.dic['Type'] is not LITERAL_XREF:
+ raise PDFNoValidXRef('Invalid PDF stream spec.')
+ size = stream.dic['Size']
+ index = stream.dic.get('Index', (0,size))
+ self.index = zip(islice(index, 0, None, 2),
+ islice(index, 1, None, 2))
+ (self.fl1, self.fl2, self.fl3) = stream.dic['W']
+ self.data = stream.get_data()
+ self.entlen = self.fl1+self.fl2+self.fl3
+ self.trailer = stream.dic
+ return
+
+ def getpos(self, objid):
+ offset = 0
+ for first, size in self.index:
+ if first <= objid and objid < (first + size):
+ break
+ offset += size
+ else:
+ raise KeyError(objid)
+ i = self.entlen * ((objid - first) + offset)
+ ent = self.data[i:i+self.entlen]
+ f1 = nunpack(ent[:self.fl1], 1)
+ if f1 == 1:
+ pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
+ genno = nunpack(ent[self.fl1+self.fl2:])
+ return (None, pos)
+ elif f1 == 2:
+ objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
+ index = nunpack(ent[self.fl1+self.fl2:])
+ return (objid, index)
+ # this is a free object
+ raise KeyError(objid)
+
+
+## PDFDocument
+##
+## A PDFDocument object represents a PDF document.
+## Since a PDF file is usually pretty big, normally it is not loaded
+## at once. Rather it is parsed dynamically as processing goes.
+## A PDF parser is associated with the document.
+##
+class PDFDocument(object):
+
+ def __init__(self):
+ self.xrefs = []
+ self.objs = {}
+ self.parsed_objs = {}
+ self.root = None
+ self.catalog = None
+ self.parser = None
+ self.encryption = None
+ self.decipher = None
+ return
+
+ # set_parser(parser)
+ # Associates the document with an (already initialized) parser object.
+ def set_parser(self, parser):
+ if self.parser: return
+ self.parser = parser
+ # The document is set to be temporarily ready during collecting
+ # all the basic information about the document, e.g.
+ # the header, the encryption information, and the access rights
+ # for the document.
+ self.ready = True
+ # Retrieve the information of each header that was appended
+ # (maybe multiple times) at the end of the document.
+ self.xrefs = parser.read_xref()
+ for xref in self.xrefs:
+ trailer = xref.trailer
+ if not trailer: continue
+
+ # If there's an encryption info, remember it.
+ if 'Encrypt' in trailer:
+ #assert not self.encryption
+ try:
+ self.encryption = (list_value(trailer['ID']),
+ dict_value(trailer['Encrypt']))
+ # fix for bad files
+ except:
+ self.encryption = ('ffffffffffffffffffffffffffffffffffff',
+ dict_value(trailer['Encrypt']))
+ if 'Root' in trailer:
+ self.set_root(dict_value(trailer['Root']))
+ break
+ else:
+ raise PDFSyntaxError('No /Root object! - Is this really a PDF?')
+ # The document is set to be non-ready again, until all the
+ # proper initialization (asking the password key and
+ # verifying the access permission, so on) is finished.
+ self.ready = False
+ return
+
+ # set_root(root)
+ # Set the Root dictionary of the document.
+ # Each PDF file must have exactly one /Root dictionary.
+ def set_root(self, root):
+ self.root = root
+ self.catalog = dict_value(self.root)
+ if self.catalog.get('Type') is not LITERAL_CATALOG:
+ if STRICT:
+ raise PDFSyntaxError('Catalog not found!')
+ return
+ # initialize(password='')
+ # Perform the initialization with a given password.
+ # This step is mandatory even if there's no password associated
+ # with the document.
+ def initialize(self, password=''):
+ if not self.encryption:
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ self.ready = True
+ return
+ (docid, param) = self.encryption
+ type = literal_name(param['Filter'])
+ if type == 'Adobe.APS':
+ return self.initialize_adobe_ps(password, docid, param)
+ if type == 'Standard':
+ return self.initialize_standard(password, docid, param)
+ if type == 'EBX_HANDLER':
+ return self.initialize_ebx(password, docid, param)
+ raise PDFEncryptionError('Unknown filter: param=%r' % param)
+
+ def initialize_adobe_ps(self, password, docid, param):
+ global KEYFILEPATH
+ self.decrypt_key = self.genkey_adobe_ps(param)
+ self.genkey = self.genkey_v4
+ self.decipher = self.decrypt_aes
+ self.ready = True
+ return
+
+ def genkey_adobe_ps(self, param):
+ # nice little offline principal keys dictionary
+ # global static principal key for German Onleihe / Bibliothek Digital
+ principalkeys = { 'bibliothek-digital.de': 'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw='.decode('base64')}
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ length = int_value(param.get('Length', 0)) / 8
+ edcdata = str_value(param.get('EDCData')).decode('base64')
+ pdrllic = str_value(param.get('PDRLLic')).decode('base64')
+ pdrlpol = str_value(param.get('PDRLPol')).decode('base64')
+ edclist = []
+ for pair in edcdata.split('\n'):
+ edclist.append(pair)
+ # principal key request
+ for key in principalkeys:
+ if key in pdrllic:
+ principalkey = principalkeys[key]
+ else:
+ raise ADEPTError('Cannot find principal key for this pdf')
+ shakey = SHA256(principalkey)
+ ivector = 16 * chr(0)
+ plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64'))
+ if plaintext[-16:] != 16 * chr(16):
+ raise ADEPTError('Offlinekey cannot be decrypted, aborting ...')
+ pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol)
+ if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16:
+ raise ADEPTError('Could not decrypt PDRLPol, aborting ...')
+ else:
+ cutter = -1 * ord(pdrlpol[-1])
+ pdrlpol = pdrlpol[:cutter]
+ return plaintext[:16]
+
+ PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \
+ '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz'
+ # experimental aes pw support
+ def initialize_standard(self, password, docid, param):
+ # copy from a global variable
+ V = int_value(param.get('V', 0))
+ if (V <=0 or V > 4):
+ raise PDFEncryptionError('Unknown algorithm: param=%r' % param)
+ length = int_value(param.get('Length', 40)) # Key length (bits)
+ O = str_value(param['O'])
+ R = int_value(param['R']) # Revision
+ if 5 <= R:
+ raise PDFEncryptionError('Unknown revision: %r' % R)
+ U = str_value(param['U'])
+ P = int_value(param['P'])
+ try:
+ EncMetadata = str_value(param['EncryptMetadata'])
+ except:
+ EncMetadata = 'True'
+ self.is_printable = bool(P & 4)
+ self.is_modifiable = bool(P & 8)
+ self.is_extractable = bool(P & 16)
+ self.is_annotationable = bool(P & 32)
+ self.is_formsenabled = bool(P & 256)
+ self.is_textextractable = bool(P & 512)
+ self.is_assemblable = bool(P & 1024)
+ self.is_formprintable = bool(P & 2048)
+ # Algorithm 3.2
+ password = (password+self.PASSWORD_PADDING)[:32] # 1
+ hash = hashlib.md5(password) # 2
+ hash.update(O) # 3
+ hash.update(struct.pack('<l', P)) # 4
+ hash.update(docid[0]) # 5
+ # aes special handling if metadata isn't encrypted
+ if EncMetadata == ('False' or 'false'):
+ hash.update('ffffffff'.decode('hex'))
+ if 5 <= R:
+ # 8
+ for _ in xrange(50):
+ hash = hashlib.md5(hash.digest()[:length/8])
+ key = hash.digest()[:length/8]
+ if R == 2:
+ # Algorithm 3.4
+ u1 = ARC4.new(key).decrypt(password)
+ elif R >= 3:
+ # Algorithm 3.5
+ hash = hashlib.md5(self.PASSWORD_PADDING) # 2
+ hash.update(docid[0]) # 3
+ x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4
+ for i in xrange(1,19+1):
+ k = ''.join( chr(ord(c) ^ i) for c in key )
+ x = ARC4.new(k).decrypt(x)
+ u1 = x+x # 32bytes total
+ if R == 2:
+ is_authenticated = (u1 == U)
+ else:
+ is_authenticated = (u1[:16] == U[:16])
+ if not is_authenticated:
+ raise ADEPTError('Password is not correct.')
+ self.decrypt_key = key
+ # genkey method
+ if V == 1 or V == 2:
+ self.genkey = self.genkey_v2
+ elif V == 3:
+ self.genkey = self.genkey_v3
+ elif V == 4:
+ self.genkey = self.genkey_v2
+ #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
+ # rc4
+ if V != 4:
+ self.decipher = self.decipher_rc4 # XXX may be AES
+ # aes
+ elif V == 4 and Length == 128:
+ elf.decipher = self.decipher_aes
+ elif V == 4 and Length == 256:
+ raise PDFNotImplementedError('AES256 encryption is currently unsupported')
+ self.ready = True
+ return
+
+ def initialize_ebx(self, password, docid, param):
+ self.is_printable = self.is_modifiable = self.is_extractable = True
+ rsa = RSA(password)
+ length = int_value(param.get('Length', 0)) / 8
+ rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
+ rights = zlib.decompress(rights, -15)
+ rights = etree.fromstring(rights)
+ expr = './/{http://ns.adobe.com/adept}encryptedKey'
+ bookkey = ''.join(rights.findtext(expr)).decode('base64')
+ bookkey = rsa.decrypt(bookkey)
+ if bookkey[0] != '\x02':
+ raise ADEPTError('error decrypting book session key')
+ index = bookkey.index('\0') + 1
+ bookkey = bookkey[index:]
+ ebx_V = int_value(param.get('V', 4))
+ ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
+ # added because of improper booktype / decryption book session key errors
+ if length > 0:
+ if len(bookkey) == length:
+ if ebx_V == 3:
+ V = 3
+ else:
+ V = 2
+ elif len(bookkey) == length + 1:
+ V = ord(bookkey[0])
+ bookkey = bookkey[1:]
+ else:
+ print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)
+ print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
+ print "bookkey[0] is %d" % ord(bookkey[0])
+ raise ADEPTError('error decrypting book session key - mismatched length')
+ else:
+ # proper length unknown try with whatever you have
+ print "ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)
+ print "length is %d and len(bookkey) is %d" % (length, len(bookkey))
+ print "bookkey[0] is %d" % ord(bookkey[0])
+ if ebx_V == 3:
+ V = 3
+ else:
+ V = 2
+ self.decrypt_key = bookkey
+ self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2
+ self.decipher = self.decrypt_rc4
+ self.ready = True
+ return
+
+ # genkey functions
+ def genkey_v2(self, objid, genno):
+ objid = struct.pack('<L', objid)[:3]
+ genno = struct.pack('<L', genno)[:2]
+ key = self.decrypt_key + objid + genno
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ def genkey_v3(self, objid, genno):
+ objid = struct.pack('<L', objid ^ 0x3569ac)
+ genno = struct.pack('<L', genno ^ 0xca96)
+ key = self.decrypt_key
+ key += objid[0] + genno[0] + objid[1] + genno[1] + objid[2] + 'sAlT'
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ # aes v2 and v4 algorithm
+ def genkey_v4(self, objid, genno):
+ objid = struct.pack('<L', objid)[:3]
+ genno = struct.pack('<L', genno)[:2]
+ key = self.decrypt_key + objid + genno + 'sAlT'
+ hash = hashlib.md5(key)
+ key = hash.digest()[:min(len(self.decrypt_key) + 5, 16)]
+ return key
+
+ def decrypt_aes(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ ivector = data[:16]
+ data = data[16:]
+ plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
+ # remove pkcs#5 aes padding
+ cutter = -1 * ord(plaintext[-1])
+ #print cutter
+ plaintext = plaintext[:cutter]
+ return plaintext
+
+ def decrypt_aes256(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ ivector = data[:16]
+ data = data[16:]
+ plaintext = AES.new(key,AES.MODE_CBC,ivector).decrypt(data)
+ # remove pkcs#5 aes padding
+ cutter = -1 * ord(plaintext[-1])
+ #print cutter
+ plaintext = plaintext[:cutter]
+ return plaintext
+
+ def decrypt_rc4(self, objid, genno, data):
+ key = self.genkey(objid, genno)
+ return ARC4.new(key).decrypt(data)
+
+
+ KEYWORD_OBJ = PSKeywordTable.intern('obj')
+
+ def getobj(self, objid):
+ if not self.ready:
+ raise PDFException('PDFDocument not initialized')
+ #assert self.xrefs
+ if objid in self.objs:
+ genno = 0
+ obj = self.objs[objid]
else:
- # named entity
+ for xref in self.xrefs:
+ try:
+ (stmid, index) = xref.getpos(objid)
+ break
+ except KeyError:
+ pass
+ else:
+ #if STRICT:
+ # raise PDFSyntaxError('Cannot locate objid=%r' % objid)
+ return None
+ if stmid:
+ if gen_xref_stm:
+ return PDFObjStmRef(objid, stmid, index)
+ # Stuff from pdfminer: extract objects from object stream
+ stream = stream_value(self.getobj(stmid))
+ if stream.dic.get('Type') is not LITERAL_OBJSTM:
+ if STRICT:
+ raise PDFSyntaxError('Not a stream object: %r' % stream)
+ try:
+ n = stream.dic['N']
+ except KeyError:
+ if STRICT:
+ raise PDFSyntaxError('N is not defined: %r' % stream)
+ n = 0
+
+ if stmid in self.parsed_objs:
+ objs = self.parsed_objs[stmid]
+ else:
+ parser = PDFObjStrmParser(stream.get_data(), self)
+ objs = []
+ try:
+ while 1:
+ (_,obj) = parser.nextobject()
+ objs.append(obj)
+ except PSEOF:
+ pass
+ self.parsed_objs[stmid] = objs
+ genno = 0
+ i = n*2+index
+ try:
+ obj = objs[i]
+ except IndexError:
+ raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
+ if isinstance(obj, PDFStream):
+ obj.set_objid(objid, 0)
+ else:
+ self.parser.seek(index)
+ (_,objid1) = self.parser.nexttoken() # objid
+ (_,genno) = self.parser.nexttoken() # genno
+ #assert objid1 == objid, (objid, objid1)
+ (_,kwd) = self.parser.nexttoken()
+ # #### hack around malformed pdf files
+ # assert objid1 == objid, (objid, objid1)
+## if objid1 != objid:
+## x = []
+## while kwd is not self.KEYWORD_OBJ:
+## (_,kwd) = self.parser.nexttoken()
+## x.append(kwd)
+## if x:
+## objid1 = x[-2]
+## genno = x[-1]
+##
+ if kwd is not self.KEYWORD_OBJ:
+ raise PDFSyntaxError(
+ 'Invalid object spec: offset=%r' % index)
+ (_,obj) = self.parser.nextobject()
+ if isinstance(obj, PDFStream):
+ obj.set_objid(objid, genno)
+ if self.decipher:
+ obj = decipher_all(self.decipher, objid, genno, obj)
+ self.objs[objid] = obj
+ return obj
+
+
+class PDFObjStmRef(object):
+ maxindex = 0
+ def __init__(self, objid, stmid, index):
+ self.objid = objid
+ self.stmid = stmid
+ self.index = index
+ if index > PDFObjStmRef.maxindex:
+ PDFObjStmRef.maxindex = index
+
+
+## PDFParser
+##
+class PDFParser(PSStackParser):
+
+ def __init__(self, doc, fp):
+ PSStackParser.__init__(self, fp)
+ self.doc = doc
+ self.doc.set_parser(self)
+ return
+
+ def __repr__(self):
+ return '<PDFParser>'
+
+ KEYWORD_R = PSKeywordTable.intern('R')
+ KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj')
+ KEYWORD_STREAM = PSKeywordTable.intern('stream')
+ KEYWORD_XREF = PSKeywordTable.intern('xref')
+ KEYWORD_STARTXREF = PSKeywordTable.intern('startxref')
+ def do_keyword(self, pos, token):
+ if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF):
+ self.add_results(*self.pop(1))
+ return
+ if token is self.KEYWORD_ENDOBJ:
+ self.add_results(*self.pop(4))
+ return
+
+ if token is self.KEYWORD_R:
+ # reference to indirect object
try:
- text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
- except KeyError:
+ ((_,objid), (_,genno)) = self.pop(2)
+ (objid, genno) = (int(objid), int(genno))
+ obj = PDFObjRef(self.doc, objid, genno)
+ self.push((pos, obj))
+ except PSSyntaxError:
pass
- return text # leave as is
- return re.sub(u"&#?\w+;", fixup, text)
+ return
-def GetDecryptedBook(infile, kDatabases, serials, pids, starttime = time.time()):
- # handle the obvious cases at the beginning
- if not os.path.isfile(infile):
- raise DrmException(u"Input file does not exist.")
+ if token is self.KEYWORD_STREAM:
+ # stream object
+ ((_,dic),) = self.pop(1)
+ dic = dict_value(dic)
+ try:
+ objlen = int_value(dic['Length'])
+ except KeyError:
+ if STRICT:
+ raise PDFSyntaxError('/Length is undefined: %r' % dic)
+ objlen = 0
+ self.seek(pos)
+ try:
+ (_, line) = self.nextline() # 'stream'
+ except PSEOF:
+ if STRICT:
+ raise PDFSyntaxError('Unexpected EOF')
+ return
+ pos += len(line)
+ self.fp.seek(pos)
+ data = self.fp.read(objlen)
+ self.seek(pos+objlen)
+ while 1:
+ try:
+ (linepos, line) = self.nextline()
+ except PSEOF:
+ if STRICT:
+ raise PDFSyntaxError('Unexpected EOF')
+ break
+ if 'endstream' in line:
+ i = line.index('endstream')
+ objlen += i
+ data += line[:i]
+ break
+ objlen += len(line)
+ data += line
+ self.seek(pos+objlen)
+ obj = PDFStream(dic, data, self.doc.decipher)
+ self.push((pos, obj))
+ return
- mobi = True
- magic3 = open(infile,'rb').read(3)
- if magic3 == 'TPZ':
- mobi = False
+ # others
+ self.push((pos, token))
+ return
- if mobi:
- mb = mobidedrm.MobiBook(infile)
- else:
- mb = topazextract.TopazBook(infile)
+ def find_xref(self):
+ # search the last xref table by scanning the file backwards.
+ prev = None
+ for line in self.revreadlines():
+ line = line.strip()
+ if line == 'startxref': break
+ if line:
+ prev = line
+ else:
+ raise PDFNoValidXRef('Unexpected EOF')
+ return int(prev)
- bookname = unescape(mb.getBookTitle())
- print u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType())
+ # read xref table
+ def read_xref_from(self, start, xrefs):
+ self.seek(start)
+ self.reset()
+ try:
+ (pos, token) = self.nexttoken()
+ except PSEOF:
+ raise PDFNoValidXRef('Unexpected EOF')
+ if isinstance(token, int):
+ # XRefStream: PDF-1.5
+ if GEN_XREF_STM == 1:
+ global gen_xref_stm
+ gen_xref_stm = True
+ self.seek(pos)
+ self.reset()
+ xref = PDFXRefStream()
+ xref.load(self)
+ else:
+ if token is not self.KEYWORD_XREF:
+ raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
+ (pos, token))
+ self.nextline()
+ xref = PDFXRef()
+ xref.load(self)
+ xrefs.append(xref)
+ trailer = xref.trailer
+ if 'XRefStm' in trailer:
+ pos = int_value(trailer['XRefStm'])
+ self.read_xref_from(pos, xrefs)
+ if 'Prev' in trailer:
+ # find previous xref
+ pos = int_value(trailer['Prev'])
+ self.read_xref_from(pos, xrefs)
+ return
- # copy list of pids
- totalpids = list(pids)
- # extend PID list with book-specific PIDs
- md1, md2 = mb.getPIDMetaInfo()
- totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases))
- print u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids))
+ # read xref tables and trailers
+ def read_xref(self):
+ xrefs = []
+ trailerpos = None
+ try:
+ pos = self.find_xref()
+ self.read_xref_from(pos, xrefs)
+ except PDFNoValidXRef:
+ # fallback
+ self.seek(0)
+ pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b')
+ offsets = {}
+ xref = PDFXRef()
+ while 1:
+ try:
+ (pos, line) = self.nextline()
+ except PSEOF:
+ break
+ if line.startswith('trailer'):
+ trailerpos = pos # remember last trailer
+ m = pat.match(line)
+ if not m: continue
+ (objid, genno) = m.groups()
+ offsets[int(objid)] = (0, pos)
+ if not offsets: raise
+ xref.offsets = offsets
+ if trailerpos:
+ self.seek(trailerpos)
+ xref.load_trailer(self)
+ xrefs.append(xref)
+ return xrefs
- try:
- mb.processBook(totalpids)
- except:
- mb.cleanup
- raise
+## PDFObjStrmParser
+##
+class PDFObjStrmParser(PDFParser):
- print u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime)
- return mb
+ def __init__(self, data, doc):
+ PSStackParser.__init__(self, StringIO(data))
+ self.doc = doc
+ return
+ def flush(self):
+ self.add_results(*self.popall())
+ return
-# kDatabaseFiles is a list of files created by kindlekey
-def decryptBook(infile, outdir, kDatabaseFiles, serials, pids):
- starttime = time.time()
- kDatabases = []
- for dbfile in kDatabaseFiles:
- kindleDatabase = {}
- try:
- with open(dbfile, 'r') as keyfilein:
- kindleDatabase = json.loads(keyfilein.read())
- kDatabases.append([dbfile,kindleDatabase])
- except Exception, e:
- print u"Error getting database from file {0:s}: {1:s}".format(dbfile,e)
- traceback.print_exc()
+ KEYWORD_R = KWD('R')
+ def do_keyword(self, pos, token):
+ if token is self.KEYWORD_R:
+ # reference to indirect object
+ try:
+ ((_,objid), (_,genno)) = self.pop(2)
+ (objid, genno) = (int(objid), int(genno))
+ obj = PDFObjRef(self.doc, objid, genno)
+ self.push((pos, obj))
+ except PSSyntaxError:
+ pass
+ return
+ # others
+ self.push((pos, token))
+ return
+###
+### My own code, for which there is none else to blame
+class PDFSerializer(object):
+ def __init__(self, inf, userkey):
+ global GEN_XREF_STM, gen_xref_stm
+ gen_xref_stm = GEN_XREF_STM > 1
+ self.version = inf.read(8)
+ inf.seek(0)
+ self.doc = doc = PDFDocument()
+ parser = PDFParser(doc, inf)
+ doc.initialize(userkey)
+ self.objids = objids = set()
+ for xref in reversed(doc.xrefs):
+ trailer = xref.trailer
+ for objid in xref.objids():
+ objids.add(objid)
+ trailer = dict(trailer)
+ trailer.pop('Prev', None)
+ trailer.pop('XRefStm', None)
+ if 'Encrypt' in trailer:
+ objids.remove(trailer.pop('Encrypt').objid)
+ self.trailer = trailer
- try:
- book = GetDecryptedBook(infile, kDatabases, serials, pids, starttime)
- except Exception, e:
- print u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime)
- traceback.print_exc()
- return 1
+ def dump(self, outf):
+ self.outf = outf
+ self.write(self.version)
+ self.write('\n%\xe2\xe3\xcf\xd3\n')
+ doc = self.doc
+ objids = self.objids
+ xrefs = {}
+ maxobj = max(objids)
+ trailer = dict(self.trailer)
+ trailer['Size'] = maxobj + 1
+ for objid in objids:
+ obj = doc.getobj(objid)
+ if isinstance(obj, PDFObjStmRef):
+ xrefs[objid] = obj
+ continue
+ if obj is not None:
+ try:
+ genno = obj.genno
+ except AttributeError:
+ genno = 0
+ xrefs[objid] = (self.tell(), genno)
+ self.serialize_indirect(objid, obj)
+ startxref = self.tell()
- # if we're saving to the same folder as the original, use file name_
- # if to a different folder, use book name
- if os.path.normcase(os.path.normpath(outdir)) == os.path.normcase(os.path.normpath(os.path.dirname(infile))):
- outfilename = os.path.splitext(os.path.basename(infile))[0]
- else:
- outfilename = cleanup_name(book.getBookTitle())
+ if not gen_xref_stm:
+ self.write('xref\n')
+ self.write('0 %d\n' % (maxobj + 1,))
+ for objid in xrange(0, maxobj + 1):
+ if objid in xrefs:
+ # force the genno to be 0
+ self.write("%010d 00000 n \n" % xrefs[objid][0])
+ else:
+ self.write("%010d %05d f \n" % (0, 65535))
- # avoid excessively long file names
- if len(outfilename)>150:
- outfilename = outfilename[:150]
+ self.write('trailer\n')
+ self.serialize_object(trailer)
+ self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
- outfilename = outfilename+u"_nodrm"
- outfile = os.path.join(outdir, outfilename + book.getBookExtension())
+ else: # Generate crossref stream.
- book.getFile(outfile)
- print u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
+ # Calculate size of entries
+ maxoffset = max(startxref, maxobj)
+ maxindex = PDFObjStmRef.maxindex
+ fl2 = 2
+ power = 65536
+ while maxoffset >= power:
+ fl2 += 1
+ power *= 256
+ fl3 = 1
+ power = 256
+ while maxindex >= power:
+ fl3 += 1
+ power *= 256
- if book.getBookType()==u"Topaz":
- zipname = os.path.join(outdir, outfilename + u"_SVG.zip")
- book.getSVGZip(zipname)
- print u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
+ index = []
+ first = None
+ prev = None
+ data = []
+ # Put the xrefstream's reference in itself
+ startxref = self.tell()
+ maxobj += 1
+ xrefs[maxobj] = (startxref, 0)
+ for objid in sorted(xrefs):
+ if first is None:
+ first = objid
+ elif objid != prev + 1:
+ index.extend((first, prev - first + 1))
+ first = objid
+ prev = objid
+ objref = xrefs[objid]
+ if isinstance(objref, PDFObjStmRef):
+ f1 = 2
+ f2 = objref.stmid
+ f3 = objref.index
+ else:
+ f1 = 1
+ f2 = objref[0]
+ # we force all generation numbers to be 0
+ # f3 = objref[1]
+ f3 = 0
- # remove internal temporary directory of Topaz pieces
- book.cleanup()
- return 0
+ data.append(struct.pack('>B', f1))
+ data.append(struct.pack('>L', f2)[-fl2:])
+ data.append(struct.pack('>L', f3)[-fl3:])
+ index.extend((first, prev - first + 1))
+ data = zlib.compress(''.join(data))
+ dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
+ 'W': [1, fl2, fl3], 'Length': len(data),
+ 'Filter': LITERALS_FLATE_DECODE[0],
+ 'Root': trailer['Root'],}
+ if 'Info' in trailer:
+ dic['Info'] = trailer['Info']
+ xrefstm = PDFStream(dic, data)
+ self.serialize_indirect(maxobj, xrefstm)
+ self.write('startxref\n%d\n%%%%EOF' % startxref)
+ def write(self, data):
+ self.outf.write(data)
+ self.last = data[-1:]
+ def tell(self):
+ return self.outf.tell()
+
+ def escape_string(self, string):
+ string = string.replace('\\', '\\\\')
+ string = string.replace('\n', r'\n')
+ string = string.replace('(', r'\(')
+ string = string.replace(')', r'\)')
+ # get rid of ciando id
+ regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
+ if regularexp.match(string): return ('http://www.ciando.com')
+ return string
+
+ def serialize_object(self, obj):
+ if isinstance(obj, dict):
+ # Correct malformed Mac OS resource forks for Stanza
+ if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \
+ and isinstance(obj['Type'], int):
+ obj['Subtype'] = obj['Type']
+ del obj['Type']
+ # end - hope this doesn't have bad effects
+ self.write('<<')
+ for key, val in obj.items():
+ self.write('/%s' % key)
+ self.serialize_object(val)
+ self.write('>>')
+ elif isinstance(obj, list):
+ self.write('[')
+ for val in obj:
+ self.serialize_object(val)
+ self.write(']')
+ elif isinstance(obj, str):
+ self.write('(%s)' % self.escape_string(obj))
+ elif isinstance(obj, bool):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write(str(obj).lower())
+ elif isinstance(obj, (int, long, float)):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write(str(obj))
+ elif isinstance(obj, PDFObjRef):
+ if self.last.isalnum():
+ self.write(' ')
+ self.write('%d %d R' % (obj.objid, 0))
+ elif isinstance(obj, PDFStream):
+ ### If we don't generate cross ref streams the object streams
+ ### are no longer useful, as we have extracted all objects from
+ ### them. Therefore leave them out from the output.
+ if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm:
+ self.write('(deleted)')
+ else:
+ data = obj.get_decdata()
+ self.serialize_object(obj.dic)
+ self.write('stream\n')
+ self.write(data)
+ self.write('\nendstream')
+ else:
+ data = str(obj)
+ if data[0].isalnum() and self.last.isalnum():
+ self.write(' ')
+ self.write(data)
+
+ def serialize_indirect(self, objid, obj):
+ self.write('%d 0 obj' % (objid,))
+ self.serialize_object(obj)
+ if self.last.isalnum():
+ self.write('\n')
+ self.write('endobj\n')
+
+
+
+
+def decryptBook(userkey, inpath, outpath):
+ if RSA is None:
+ raise ADEPTError(u"PyCrypto or OpenSSL must be installed.")
+ with open(inpath, 'rb') as inf:
+ try:
+ serializer = PDFSerializer(inf, userkey)
+ except:
+ print u"Error serializing pdf {0}. Probably wrong key.".format(os.path.basename(inpath))
+ return 2
+ # hope this will fix the 'bad file descriptor' problem
+ with open(outpath, 'wb') as outf:
+ # help construct to make sure the method runs to the end
+ try:
+ serializer.dump(outf)
+ except Exception, e:
+ print u"error writing pdf: {0}".format(e.args[0])
+ return 2
+ return 0
-def usage(progname):
- print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks"
- print u"Usage:"
- print u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir>".format(progname)
-#
-# Main
-#
def cli_main():
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
argv=unicode_argv()
progname = os.path.basename(argv[0])
- print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__)
+ if len(argv) != 4:
+ print u"usage: {0} <keyfile.der> <inbook.pdf> <outbook.pdf>".format(progname)
+ return 1
+ keypath, inpath, outpath = argv[1:]
+ userkey = open(keypath,'rb').read()
+ result = decryptBook(userkey, inpath, outpath)
+ if result == 0:
+ print u"Successfully decrypted {0:s} as {1:s}".format(os.path.basename(inpath),os.path.basename(outpath))
+ return result
+
+def gui_main():
try:
- opts, args = getopt.getopt(argv[1:], "k:p:s:")
- except getopt.GetoptError, err:
- print u"Error in options or arguments: {0}".format(err.args[0])
- usage(progname)
- sys.exit(2)
- if len(args)<2:
- usage(progname)
- sys.exit(2)
-
- infile = args[0]
- outdir = args[1]
- kDatabaseFiles = []
- serials = []
- pids = []
-
- for o, a in opts:
- if o == "-k":
- if a == None :
- raise DrmException("Invalid parameter for -k")
- kDatabaseFiles.append(a)
- if o == "-p":
- if a == None :
- raise DrmException("Invalid parameter for -p")
- pids = a.split(',')
- if o == "-s":
- if a == None :
- raise DrmException("Invalid parameter for -s")
- serials = a.split(',')
-
- # try with built in Kindle Info files if not on Linux
- k4 = not sys.platform.startswith('linux')
-
- return decryptBook(infile, outdir, kDatabaseFiles, serials, pids)
+ import Tkinter
+ import Tkconstants
+ import tkMessageBox
+ import traceback
+ except:
+ return cli_main()
+
+ class DecryptionDialog(Tkinter.Frame):
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root, border=5)
+ self.status = Tkinter.Label(self, text=u"Select files for decryption")
+ self.status.pack(fill=Tkconstants.X, expand=1)
+ body = Tkinter.Frame(self)
+ body.pack(fill=Tkconstants.X, expand=1)
+ sticky = Tkconstants.E + Tkconstants.W
+ body.grid_columnconfigure(1, weight=2)
+ Tkinter.Label(body, text=u"Key file").grid(row=0)
+ self.keypath = Tkinter.Entry(body, width=30)
+ self.keypath.grid(row=0, column=1, sticky=sticky)
+ if os.path.exists(u"adeptkey.der"):
+ self.keypath.insert(0, u"adeptkey.der")
+ button = Tkinter.Button(body, text=u"...", command=self.get_keypath)
+ button.grid(row=0, column=2)
+ Tkinter.Label(body, text=u"Input file").grid(row=1)
+ self.inpath = Tkinter.Entry(body, width=30)
+ self.inpath.grid(row=1, column=1, sticky=sticky)
+ button = Tkinter.Button(body, text=u"...", command=self.get_inpath)
+ button.grid(row=1, column=2)
+ Tkinter.Label(body, text=u"Output file").grid(row=2)
+ self.outpath = Tkinter.Entry(body, width=30)
+ self.outpath.grid(row=2, column=1, sticky=sticky)
+ button = Tkinter.Button(body, text=u"...", command=self.get_outpath)
+ button.grid(row=2, column=2)
+ buttons = Tkinter.Frame(self)
+ buttons.pack()
+ botton = Tkinter.Button(
+ buttons, text=u"Decrypt", width=10, command=self.decrypt)
+ botton.pack(side=Tkconstants.LEFT)
+ Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT)
+ button = Tkinter.Button(
+ buttons, text=u"Quit", width=10, command=self.quit)
+ button.pack(side=Tkconstants.RIGHT)
+
+ def get_keypath(self):
+ keypath = tkFileDialog.askopenfilename(
+ parent=None, title=u"Select Adobe Adept \'.der\' key file",
+ defaultextension=u".der",
+ filetypes=[('Adobe Adept DER-encoded files', '.der'),
+ ('All Files', '.*')])
+ if keypath:
+ keypath = os.path.normpath(keypath)
+ self.keypath.delete(0, Tkconstants.END)
+ self.keypath.insert(0, keypath)
+ return
+
+ def get_inpath(self):
+ inpath = tkFileDialog.askopenfilename(
+ parent=None, title=u"Select ADEPT-encrypted PDF file to decrypt",
+ defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
+ if inpath:
+ inpath = os.path.normpath(inpath)
+ self.inpath.delete(0, Tkconstants.END)
+ self.inpath.insert(0, inpath)
+ return
+
+ def get_outpath(self):
+ outpath = tkFileDialog.asksaveasfilename(
+ parent=None, title=u"Select unencrypted PDF file to produce",
+ defaultextension=u".pdf", filetypes=[('PDF files', '.pdf')])
+ if outpath:
+ outpath = os.path.normpath(outpath)
+ self.outpath.delete(0, Tkconstants.END)
+ self.outpath.insert(0, outpath)
+ return
+
+ def decrypt(self):
+ keypath = self.keypath.get()
+ inpath = self.inpath.get()
+ outpath = self.outpath.get()
+ if not keypath or not os.path.exists(keypath):
+ self.status['text'] = u"Specified key file does not exist"
+ return
+ if not inpath or not os.path.exists(inpath):
+ self.status['text'] = u"Specified input file does not exist"
+ return
+ if not outpath:
+ self.status['text'] = u"Output file not specified"
+ return
+ if inpath == outpath:
+ self.status['text'] = u"Must have different input and output files"
+ return
+ userkey = open(keypath,'rb').read()
+ self.status['text'] = u"Decrypting..."
+ try:
+ decrypt_status = decryptBook(userkey, inpath, outpath)
+ except Exception, e:
+ self.status['text'] = u"Error; {0}".format(e.args[0])
+ return
+ if decrypt_status == 0:
+ self.status['text'] = u"File successfully decrypted"
+ else:
+ self.status['text'] = u"The was an error decrypting the file."
+
+
+ root = Tkinter.Tk()
+ if RSA is None:
+ root.withdraw()
+ tkMessageBox.showerror(
+ "INEPT PDF",
+ "This script requires OpenSSL or PyCrypto, which must be installed "
+ "separately. Read the top-of-script comment for details.")
+ return 1
+ root.title(u"Adobe Adept PDF Decrypter v.{0}".format(__version__))
+ root.resizable(True, False)
+ root.minsize(370, 0)
+ DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1)
+ root.mainloop()
+ return 0
if __name__ == '__main__':
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(cli_main())
+ if len(sys.argv) > 1:
+ sys.exit(cli_main())
+ sys.exit(gui_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py b/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py
index dd88797..504105b 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/k4mobidedrm.py
@@ -2,266 +2,331 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
-import sys
-import os, csv
-import binascii
-import zlib
+
+# ignobleepub.pyw, version 3.6
+# Copyright © 2009-2012 by DiapDealer et al.
+
+# engine to remove drm from Kindle for Mac and Kindle for PC books
+# for personal use for archiving and converting your ebooks
+
+# PLEASE DO NOT PIRATE EBOOKS!
+
+# We want all authors and publishers, and eBook stores to live
+# long and prosperous lives but at the same time we just want to
+# be able to read OUR books on whatever device we want and to keep
+# readable for a long, long time
+
+# This borrows very heavily from works by CMBDTC, IHeartCabbages, skindle,
+# unswindle, DarkReverser, ApprenticeAlf, DiapDealer, some_updates
+# and many many others
+# Special thanks to The Dark Reverser for MobiDeDrm and CMBDTC for cmbdtc_dump
+# from which this script borrows most unashamedly.
+
+
+# Changelog
+# 1.0 - Name change to k4mobidedrm. Adds Mac support, Adds plugin code
+# 1.1 - Adds support for additional kindle.info files
+# 1.2 - Better error handling for older Mobipocket
+# 1.3 - Don't try to decrypt Topaz books
+# 1.7 - Add support for Topaz books and Kindle serial numbers. Split code.
+# 1.9 - Tidy up after Topaz, minor exception changes
+# 2.1 - Topaz fix and filename sanitizing
+# 2.2 - Topaz Fix and minor Mac code fix
+# 2.3 - More Topaz fixes
+# 2.4 - K4PC/Mac key generation fix
+# 2.6 - Better handling of non-K4PC/Mac ebooks
+# 2.7 - Better trailing bytes handling in mobidedrm
+# 2.8 - Moved parsing of kindle.info files to mac & pc util files.
+# 3.1 - Updated for new calibre interface. Now __init__ in plugin.
+# 3.5 - Now support Kindle for PC/Mac 1.6
+# 3.6 - Even better trailing bytes handling in mobidedrm
+# 3.7 - Add support for Amazon Print Replica ebooks.
+# 3.8 - Improved Topaz support
+# 4.1 - Improved Topaz support and faster decryption with alfcrypto
+# 4.2 - Added support for Amazon's KF8 format ebooks
+# 4.4 - Linux calls to Wine added, and improved configuration dialog
+# 4.5 - Linux works again without Wine. Some Mac key file search changes
+# 4.6 - First attempt to handle unicode properly
+# 4.7 - Added timing reports, and changed search for Mac key files
+# 4.8 - Much better unicode handling, matching the updated inept and ignoble scripts
+# - Moved back into plugin, __init__ in plugin now only contains plugin code.
+# 4.9 - Missed some invalid characters in cleanup_name
+# 5.0 - Extraction of info from Kindle for PC/Mac moved into kindlekey.py
+# - tweaked GetDecryptedBook interface to leave passed parameters unchanged
+# 5.1 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 5.2 - Fixed error in command line processing of unicode arguments
+
+__version__ = '5.2'
+
+
+import sys, os, re
+import csv
+import getopt
import re
-from struct import pack, unpack, unpack_from
import traceback
+import time
+import htmlentitydefs
+import json
class DrmException(Exception):
pass
-global charMap1
-global charMap3
-global charMap4
-
-
-charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M'
-charMap3 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-charMap4 = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'
-
-# crypto digestroutines
-import hashlib
-
-def MD5(message):
- ctx = hashlib.md5()
- ctx.update(message)
- return ctx.digest()
-
-def SHA1(message):
- ctx = hashlib.sha1()
- ctx.update(message)
- return ctx.digest()
-
-
-# Encode the bytes in data with the characters in map
-def encode(data, map):
- result = ''
- for char in data:
- value = ord(char)
- Q = (value ^ 0x80) // len(map)
- R = value % len(map)
- result += map[Q]
- result += map[R]
- return result
-
-# Hash the bytes in data and then encode the digest with the characters in map
-def encodeHash(data,map):
- return encode(MD5(data),map)
-
-# Decode the string in data with the characters in map. Returns the decoded bytes
-def decode(data,map):
- result = ''
- for i in range (0,len(data)-1,2):
- high = map.find(data[i])
- low = map.find(data[i+1])
- if (high == -1) or (low == -1) :
- break
- value = (((high * len(map)) ^ 0x80) & 0xFF) + low
- result += pack('B',value)
- return result
-
-#
-# PID generation routines
-#
-
-# Returns two bit at offset from a bit field
-def getTwoBitsFromBitField(bitField,offset):
- byteNumber = offset // 4
- bitPosition = 6 - 2*(offset % 4)
- return ord(bitField[byteNumber]) >> bitPosition & 3
-
-# Returns the six bits at offset from a bit field
-def getSixBitsFromBitField(bitField,offset):
- offset *= 3
- value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
- return value
-
-# 8 bits to six bits encoding from hash to generate PID string
-def encodePID(hash):
- global charMap3
- PID = ''
- for position in range (0,8):
- PID += charMap3[getSixBitsFromBitField(hash,position)]
- return PID
-
-# Encryption table used to generate the device PID
-def generatePidEncryptionTable() :
- table = []
- for counter1 in range (0,0x100):
- value = counter1
- for counter2 in range (0,8):
- if (value & 1 == 0) :
- value = value >> 1
- else :
- value = value >> 1
- value = value ^ 0xEDB88320
- table.append(value)
- return table
-
-# Seed value used to generate the device PID
-def generatePidSeed(table,dsn) :
- value = 0
- for counter in range (0,4) :
- index = (ord(dsn[counter]) ^ value) &0xFF
- value = (value >> 8) ^ table[index]
- return value
-
-# Generate the device PID
-def generateDevicePID(table,dsn,nbRoll):
- global charMap4
- seed = generatePidSeed(table,dsn)
- pidAscii = ''
- pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
- index = 0
- for counter in range (0,nbRoll):
- pid[index] = pid[index] ^ ord(dsn[counter])
- index = (index+1) %8
- for counter in range (0,8):
- index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
- pidAscii += charMap4[index]
- return pidAscii
-
-def crc32(s):
- return (~binascii.crc32(s,-1))&0xFFFFFFFF
-
-# convert from 8 digit PID to 10 digit PID with checksum
-def checksumPid(s):
- global charMap4
- crc = crc32(s)
- crc = crc ^ (crc >> 16)
- res = s
- l = len(charMap4)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += charMap4[pos%l]
- crc >>= 8
- return res
-
-
-# old kindle serial number to fixed pid
-def pidFromSerial(s, l):
- global charMap4
- crc = crc32(s)
- arr1 = [0]*l
- for i in xrange(len(s)):
- arr1[i%l] ^= ord(s[i])
- crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
- for i in xrange(l):
- arr1[i] ^= crc_bytes[i&3]
- pid = ""
- for i in xrange(l):
- b = arr1[i] & 0xff
- pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
- return pid
-
-
-# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
-def getKindlePids(rec209, token, serialnum):
- pids=[]
-
- if isinstance(serialnum,unicode):
- serialnum = serialnum.encode('ascii')
-
- # Compute book PID
- pidHash = SHA1(serialnum+rec209+token)
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
- pids.append(bookPID)
-
- # compute fixed pid for old pre 2.5 firmware update pid as well
- kindlePID = pidFromSerial(serialnum, 7) + "*"
- kindlePID = checksumPid(kindlePID)
- pids.append(kindlePID)
-
- return pids
-
-
-# parse the Kindleinfo file to calculate the book pid.
-
-keynames = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber']
-
-def getK4Pids(rec209, token, kindleDatabase):
- global charMap1
- pids = []
+if 'calibre' in sys.modules:
+ inCalibre = True
+else:
+ inCalibre = False
+
+if inCalibre:
+ from calibre_plugins.dedrm import mobidedrm
+ from calibre_plugins.dedrm import topazextract
+ from calibre_plugins.dedrm import kgenpids
+ from calibre_plugins.dedrm import android
+else:
+ import mobidedrm
+ import topazextract
+ import kgenpids
+ import android
+
+# Wrap a stream so that output gets flushed immediately
+# and also make sure that any unicode strings get
+# encoded using "replace" before writing them.
+class SafeUnbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ self.encoding = stream.encoding
+ if self.encoding == None:
+ self.encoding = "utf-8"
+ def write(self, data):
+ if isinstance(data,unicode):
+ data = data.encode(self.encoding,"replace")
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+iswindows = sys.platform.startswith('win')
+isosx = sys.platform.startswith('darwin')
+
+def unicode_argv():
+ if iswindows:
+ # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
+ # strings.
+
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'.
+
+
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ xrange(start, argc.value)]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"mobidedrm.py"]
+ else:
+ argvencoding = sys.stdin.encoding
+ if argvencoding == None:
+ argvencoding = "utf-8"
+ return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+
+# cleanup unicode filenames
+# borrowed from calibre from calibre/src/calibre/__init__.py
+# added in removal of control (<32) chars
+# and removal of . at start and end
+# and with some (heavily edited) code from Paul Durrant's kindlenamer.py
+def cleanup_name(name):
+ # substitute filename unfriendly characters
+ name = name.replace(u"<",u"[").replace(u">",u"]").replace(u" : ",u" – ").replace(u": ",u" – ").replace(u":",u"—").replace(u"/",u"_").replace(u"\\",u"_").replace(u"|",u"_").replace(u"\"",u"\'").replace(u"*",u"_").replace(u"?",u"")
+ # delete control characters
+ name = u"".join(char for char in name if ord(char)>=32)
+ # white space to single space, delete leading and trailing while space
+ name = re.sub(ur"\s", u" ", name).strip()
+ # remove leading dots
+ while len(name)>0 and name[0] == u".":
+ name = name[1:]
+ # remove trailing dots (Windows doesn't like them)
+ if name.endswith(u'.'):
+ name = name[:-1]
+ return name
+
+# must be passed unicode
+def unescape(text):
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == u"&#":
+ # character reference
+ try:
+ if text[:3] == u"&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ return text # leave as is
+ return re.sub(u"&#?\w+;", fixup, text)
+
+def GetDecryptedBook(infile, kDatabases, serials, pids, starttime = time.time()):
+ # handle the obvious cases at the beginning
+ if not os.path.isfile(infile):
+ raise DrmException(u"Input file does not exist.")
+
+ mobi = True
+ magic3 = open(infile,'rb').read(3)
+ if magic3 == 'TPZ':
+ mobi = False
+
+ if mobi:
+ mb = mobidedrm.MobiBook(infile)
+ else:
+ mb = topazextract.TopazBook(infile)
+
+ bookname = unescape(mb.getBookTitle())
+ print u"Decrypting {1} ebook: {0}".format(bookname, mb.getBookType())
+
+ # copy list of pids
+ totalpids = list(pids)
+ # extend PID list with book-specific PIDs
+ md1, md2 = mb.getPIDMetaInfo()
+ totalpids.extend(kgenpids.getPidList(md1, md2, serials, kDatabases))
+ print u"Found {1:d} keys to try after {0:.1f} seconds".format(time.time()-starttime, len(totalpids))
try:
- # Get the Mazama Random number
- MazamaRandomNumber = (kindleDatabase[1])['MazamaRandomNumber'].decode('hex').encode('ascii')
+ mb.processBook(totalpids)
+ except:
+ mb.cleanup
+ raise
- # Get the kindle account token
- kindleAccountToken = (kindleDatabase[1])['kindle.account.tokens'].decode('hex').encode('ascii')
+ print u"Decryption succeeded after {0:.1f} seconds".format(time.time()-starttime)
+ return mb
- # Get the IDString used to decode the Kindle Info file
- IDString = (kindleDatabase[1])['IDString'].decode('hex').encode('ascii')
- # Get the UserName stored when the Kindle Info file was decoded
- UserName = (kindleDatabase[1])['UserName'].decode('hex').encode('ascii')
-
- except KeyError:
- print u"Keys not found in the database {0}.".format(kindleDatabase[0])
- return pids
+# kDatabaseFiles is a list of files created by kindlekey
+def decryptBook(infile, outdir, kDatabaseFiles, serials, pids):
+ starttime = time.time()
+ kDatabases = []
+ for dbfile in kDatabaseFiles:
+ kindleDatabase = {}
+ try:
+ with open(dbfile, 'r') as keyfilein:
+ kindleDatabase = json.loads(keyfilein.read())
+ kDatabases.append([dbfile,kindleDatabase])
+ except Exception, e:
+ print u"Error getting database from file {0:s}: {1:s}".format(dbfile,e)
+ traceback.print_exc()
- # Get the ID string used
- encodedIDString = encodeHash(IDString,charMap1)
- # Get the current user name
- encodedUsername = encodeHash(UserName,charMap1)
- # concat, hash and encode to calculate the DSN
- DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1)
+ try:
+ book = GetDecryptedBook(infile, kDatabases, serials, pids, starttime)
+ except Exception, e:
+ print u"Error decrypting book after {1:.1f} seconds: {0}".format(e.args[0],time.time()-starttime)
+ traceback.print_exc()
+ return 1
- # Compute the device PID (for which I can tell, is used for nothing).
- table = generatePidEncryptionTable()
- devicePID = generateDevicePID(table,DSN,4)
- devicePID = checksumPid(devicePID)
- pids.append(devicePID)
+ # if we're saving to the same folder as the original, use file name_
+ # if to a different folder, use book name
+ if os.path.normcase(os.path.normpath(outdir)) == os.path.normcase(os.path.normpath(os.path.dirname(infile))):
+ outfilename = os.path.splitext(os.path.basename(infile))[0]
+ else:
+ outfilename = cleanup_name(book.getBookTitle())
- # Compute book PIDs
+ # avoid excessively long file names
+ if len(outfilename)>150:
+ outfilename = outfilename[:150]
- # book pid
- pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
- pids.append(bookPID)
+ outfilename = outfilename+u"_nodrm"
+ outfile = os.path.join(outdir, outfilename + book.getBookExtension())
- # variant 1
- pidHash = SHA1(kindleAccountToken+rec209+token)
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
- pids.append(bookPID)
+ book.getFile(outfile)
+ print u"Saved decrypted book {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
- # variant 2
- pidHash = SHA1(DSN+rec209+token)
- bookPID = encodePID(pidHash)
- bookPID = checksumPid(bookPID)
- pids.append(bookPID)
+ if book.getBookType()==u"Topaz":
+ zipname = os.path.join(outdir, outfilename + u"_SVG.zip")
+ book.getSVGZip(zipname)
+ print u"Saved SVG ZIP Archive for {1:s} after {0:.1f} seconds".format(time.time()-starttime, outfilename)
- return pids
+ # remove internal temporary directory of Topaz pieces
+ book.cleanup()
+ return 0
-def getPidList(md1, md2, serials=[], kDatabases=[]):
- pidlst = []
- if kDatabases is None:
- kDatabases = []
- if serials is None:
- serials = []
+def usage(progname):
+ print u"Removes DRM protection from Mobipocket, Amazon KF8, Amazon Print Replica and Amazon Topaz ebooks"
+ print u"Usage:"
+ print u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] [ -a <AmazonSecureStorage.xml> ] <infile> <outdir>".format(progname)
- for kDatabase in kDatabases:
- try:
- pidlst.extend(getK4Pids(md1, md2, kDatabase))
- except Exception, e:
- print u"Error getting PIDs from database {0}: {1}".format(kDatabase[0],e.args[0])
- traceback.print_exc()
+#
+# Main
+#
+def cli_main():
+ argv=unicode_argv()
+ progname = os.path.basename(argv[0])
+ print u"K4MobiDeDrm v{0}.\nCopyright © 2008-2013 The Dark Reverser et al.".format(__version__)
- for serialnum in serials:
- try:
- pidlst.extend(getKindlePids(md1, md2, serialnum))
- except Exception, e:
- print u"Error getting PIDs from serial number {0}: {1}".format(serialnum ,e.args[0])
- traceback.print_exc()
+ try:
+ opts, args = getopt.getopt(argv[1:], "k:p:s:a:")
+ except getopt.GetoptError, err:
+ print u"Error in options or arguments: {0}".format(err.args[0])
+ usage(progname)
+ sys.exit(2)
+ if len(args)<2:
+ usage(progname)
+ sys.exit(2)
+
+ infile = args[0]
+ outdir = args[1]
+ kDatabaseFiles = []
+ serials = []
+ pids = []
- return pidlst
+ for o, a in opts:
+ if o == "-k":
+ if a == None :
+ raise DrmException("Invalid parameter for -k")
+ kDatabaseFiles.append(a)
+ if o == "-p":
+ if a == None :
+ raise DrmException("Invalid parameter for -p")
+ pids = a.split(',')
+ if o == "-s":
+ if a == None :
+ raise DrmException("Invalid parameter for -s")
+ serials = a.split(',')
+ if o == '-a':
+ if a == None:
+ continue
+ serials.extend(android.get_serials(a))
+ serials.extend(android.get_serials())
+
+ # try with built in Kindle Info files if not on Linux
+ k4 = not sys.platform.startswith('linux')
+
+ return decryptBook(infile, outdir, kDatabaseFiles, serials, pids)
+
+
+if __name__ == '__main__':
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
+ sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py b/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py
index f58e973..dd88797 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/kgenpids.py
@@ -2,102 +2,25 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
-
-# kindlekey.py
-# Copyright © 2010-2013 by some_updates and Apprentice Alf
-#
-# Currently requires alfcrypto.py which requires the alfcrypto library
-
-# Revision history:
-# 1.0 - Kindle info file decryption, extracted from k4mobidedrm, etc.
-# 1.1 - Added Tkinter to match adobekey.py
-# 1.2 - Fixed testing of successful retrieval on Mac
-# 1.3 - Added getkey interface for Windows DeDRM application
-# Simplified some of the Kindle for Mac code.
-# 1.4 - Remove dependency on alfcrypto
-# 1.5 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 1.6 - Fixed a problem getting the disk serial numbers
-# 1.7 - Work if TkInter is missing
-# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names
-
-
-"""
-Retrieve Kindle for PC/Mac user key.
-"""
-
-__license__ = 'GPL v3'
-__version__ = '1.8'
-
-import sys, os, re
+import sys
+import os, csv
+import binascii
+import zlib
+import re
from struct import pack, unpack, unpack_from
-import json
-import getopt
-
-# Routines common to Mac and PC
+import traceback
-# Wrap a stream so that output gets flushed immediately
-# and also make sure that any unicode strings get
-# encoded using "replace" before writing them.
-class SafeUnbuffered:
- def __init__(self, stream):
- self.stream = stream
- self.encoding = stream.encoding
- if self.encoding == None:
- self.encoding = "utf-8"
- def write(self, data):
- if isinstance(data,unicode):
- data = data.encode(self.encoding,"replace")
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-try:
- from calibre.constants import iswindows, isosx
-except:
- iswindows = sys.platform.startswith('win')
- isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
- if iswindows:
- # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
- # strings.
-
- # Versions 2.x of Python don't support Unicode in sys.argv on
- # Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
- # as a list of Unicode strings and encode them as utf-8
-
- from ctypes import POINTER, byref, cdll, c_int, windll
- from ctypes.wintypes import LPCWSTR, LPWSTR
+class DrmException(Exception):
+ pass
- GetCommandLineW = cdll.kernel32.GetCommandLineW
- GetCommandLineW.argtypes = []
- GetCommandLineW.restype = LPCWSTR
+global charMap1
+global charMap3
+global charMap4
- CommandLineToArgvW = windll.shell32.CommandLineToArgvW
- CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
- CommandLineToArgvW.restype = POINTER(LPWSTR)
- cmd = GetCommandLineW()
- argc = c_int(0)
- argv = CommandLineToArgvW(cmd, byref(argc))
- if argc.value > 0:
- # Remove Python executable and commands if present
- start = argc.value - len(sys.argv)
- return [argv[i] for i in
- xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"kindlekey.py"]
- else:
- argvencoding = sys.stdin.encoding
- if argvencoding == None:
- argvencoding = "utf-8"
- return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-
-class DrmException(Exception):
- pass
+charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M'
+charMap3 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+charMap4 = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'
# crypto digestroutines
import hashlib
@@ -112,31 +35,6 @@ def SHA1(message):
ctx.update(message)
return ctx.digest()
-def SHA256(message):
- ctx = hashlib.sha256()
- ctx.update(message)
- return ctx.digest()
-
-# For K4M/PC 1.6.X and later
-# generate table of prime number less than or equal to int n
-def primes(n):
- if n==2: return [2]
- elif n<2: return []
- s=range(3,n+1,2)
- mroot = n ** 0.5
- half=(n+1)/2-1
- i=0
- m=3
- while m <= mroot:
- if s[i]:
- j=(m*m-3)/2
- s[j]=0
- while j<half:
- s[j]=0
- j+=m
- i=i+1
- m=2*i+3
- return [2]+[x for x in s if x]
# Encode the bytes in data with the characters in map
def encode(data, map):
@@ -165,1754 +63,205 @@ def decode(data,map):
result += pack('B',value)
return result
-# Routines unique to Mac and PC
-if iswindows:
- from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
- create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
- string_at, Structure, c_void_p, cast
+#
+# PID generation routines
+#
- import _winreg as winreg
- MAX_PATH = 255
- kernel32 = windll.kernel32
- advapi32 = windll.advapi32
- crypt32 = windll.crypt32
+# Returns two bit at offset from a bit field
+def getTwoBitsFromBitField(bitField,offset):
+ byteNumber = offset // 4
+ bitPosition = 6 - 2*(offset % 4)
+ return ord(bitField[byteNumber]) >> bitPosition & 3
+
+# Returns the six bits at offset from a bit field
+def getSixBitsFromBitField(bitField,offset):
+ offset *= 3
+ value = (getTwoBitsFromBitField(bitField,offset) <<4) + (getTwoBitsFromBitField(bitField,offset+1) << 2) +getTwoBitsFromBitField(bitField,offset+2)
+ return value
+
+# 8 bits to six bits encoding from hash to generate PID string
+def encodePID(hash):
+ global charMap3
+ PID = ''
+ for position in range (0,8):
+ PID += charMap3[getSixBitsFromBitField(hash,position)]
+ return PID
+
+# Encryption table used to generate the device PID
+def generatePidEncryptionTable() :
+ table = []
+ for counter1 in range (0,0x100):
+ value = counter1
+ for counter2 in range (0,8):
+ if (value & 1 == 0) :
+ value = value >> 1
+ else :
+ value = value >> 1
+ value = value ^ 0xEDB88320
+ table.append(value)
+ return table
+
+# Seed value used to generate the device PID
+def generatePidSeed(table,dsn) :
+ value = 0
+ for counter in range (0,4) :
+ index = (ord(dsn[counter]) ^ value) &0xFF
+ value = (value >> 8) ^ table[index]
+ return value
+
+# Generate the device PID
+def generateDevicePID(table,dsn,nbRoll):
+ global charMap4
+ seed = generatePidSeed(table,dsn)
+ pidAscii = ''
+ pid = [(seed >>24) &0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF,(seed>>24) & 0xFF,(seed >> 16) &0xff,(seed >> 8) &0xFF,(seed) & 0xFF]
+ index = 0
+ for counter in range (0,nbRoll):
+ pid[index] = pid[index] ^ ord(dsn[counter])
+ index = (index+1) %8
+ for counter in range (0,8):
+ index = ((((pid[counter] >>5) & 3) ^ pid[counter]) & 0x1f) + (pid[counter] >> 7)
+ pidAscii += charMap4[index]
+ return pidAscii
+
+def crc32(s):
+ return (~binascii.crc32(s,-1))&0xFFFFFFFF
+
+# convert from 8 digit PID to 10 digit PID with checksum
+def checksumPid(s):
+ global charMap4
+ crc = crc32(s)
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(charMap4)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += charMap4[pos%l]
+ crc >>= 8
+ return res
+
+
+# old kindle serial number to fixed pid
+def pidFromSerial(s, l):
+ global charMap4
+ crc = crc32(s)
+ arr1 = [0]*l
+ for i in xrange(len(s)):
+ arr1[i%l] ^= ord(s[i])
+ crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
+ for i in xrange(l):
+ arr1[i] ^= crc_bytes[i&3]
+ pid = ""
+ for i in xrange(l):
+ b = arr1[i] & 0xff
+ pid+=charMap4[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ return pid
+
+
+# Parse the EXTH header records and use the Kindle serial number to calculate the book pid.
+def getKindlePids(rec209, token, serialnum):
+ pids=[]
+
+ if isinstance(serialnum,unicode):
+ serialnum = serialnum.encode('ascii')
+
+ # Compute book PID
+ pidHash = SHA1(serialnum+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pids.append(bookPID)
+
+ # compute fixed pid for old pre 2.5 firmware update pid as well
+ kindlePID = pidFromSerial(serialnum, 7) + "*"
+ kindlePID = checksumPid(kindlePID)
+ pids.append(kindlePID)
+
+ return pids
+
+
+# parse the Kindleinfo file to calculate the book pid.
+
+keynames = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber']
+
+def getK4Pids(rec209, token, kindleDatabase):
+ global charMap1
+ pids = []
try:
- # try to get fast routines from alfcrypto
- from alfcrypto import AES_CBC, KeyIVGen
- except:
- # alfcrypto not available, so use python implementations
- """
- Routines for doing AES CBC in one file
-
- Modified by some_updates to extract
- and combine only those parts needed for AES CBC
- into one simple to add python file
-
- Original Version
- Copyright (c) 2002 by Paul A. Lambert
- Under:
- CryptoPy Artisitic License Version 1.0
- See the wonderful pure python package cryptopy-1.2.5
- and read its LICENSE.txt for complete license details.
- """
-
- class CryptoError(Exception):
- """ Base class for crypto exceptions """
- def __init__(self,errorMessage='Error!'):
- self.message = errorMessage
- def __str__(self):
- return self.message
-
- class InitCryptoError(CryptoError):
- """ Crypto errors during algorithm initialization """
- class BadKeySizeError(InitCryptoError):
- """ Bad key size error """
- class EncryptError(CryptoError):
- """ Error in encryption processing """
- class DecryptError(CryptoError):
- """ Error in decryption processing """
- class DecryptNotBlockAlignedError(DecryptError):
- """ Error in decryption processing """
-
- def xorS(a,b):
- """ XOR two strings """
- assert len(a)==len(b)
- x = []
- for i in range(len(a)):
- x.append( chr(ord(a[i])^ord(b[i])))
- return ''.join(x)
-
- def xor(a,b):
- """ XOR two strings """
- x = []
- for i in range(min(len(a),len(b))):
- x.append( chr(ord(a[i])^ord(b[i])))
- return ''.join(x)
-
- """
- Base 'BlockCipher' and Pad classes for cipher instances.
- BlockCipher supports automatic padding and type conversion. The BlockCipher
- class was written to make the actual algorithm code more readable and
- not for performance.
- """
-
- class BlockCipher:
- """ Block ciphers """
- def __init__(self):
- self.reset()
-
- def reset(self):
- self.resetEncrypt()
- self.resetDecrypt()
- def resetEncrypt(self):
- self.encryptBlockCount = 0
- self.bytesToEncrypt = ''
- def resetDecrypt(self):
- self.decryptBlockCount = 0
- self.bytesToDecrypt = ''
-
- def encrypt(self, plainText, more = None):
- """ Encrypt a string and return a binary string """
- self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt
- numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize)
- cipherText = ''
- for i in range(numBlocks):
- bStart = i*self.blockSize
- ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize])
- self.encryptBlockCount += 1
- cipherText += ctBlock
- if numExtraBytes > 0: # save any bytes that are not block aligned
- self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
- else:
- self.bytesToEncrypt = ''
-
- if more == None: # no more data expected from caller
- finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize)
- if len(finalBytes) > 0:
- ctBlock = self.encryptBlock(finalBytes)
- self.encryptBlockCount += 1
- cipherText += ctBlock
- self.resetEncrypt()
- return cipherText
-
- def decrypt(self, cipherText, more = None):
- """ Decrypt a string and return a string """
- self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt
-
- numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize)
- if more == None: # no more calls to decrypt, should have all the data
- if numExtraBytes != 0:
- raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt'
-
- # hold back some bytes in case last decrypt has zero len
- if (more != None) and (numExtraBytes == 0) and (numBlocks >0) :
- numBlocks -= 1
- numExtraBytes = self.blockSize
-
- plainText = ''
- for i in range(numBlocks):
- bStart = i*self.blockSize
- ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize])
- self.decryptBlockCount += 1
- plainText += ptBlock
-
- if numExtraBytes > 0: # save any bytes that are not block aligned
- self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
- else:
- self.bytesToEncrypt = ''
-
- if more == None: # last decrypt remove padding
- plainText = self.padding.removePad(plainText, self.blockSize)
- self.resetDecrypt()
- return plainText
-
-
- class Pad:
- def __init__(self):
- pass # eventually could put in calculation of min and max size extension
-
- class padWithPadLen(Pad):
- """ Pad a binary string with the length of the padding """
-
- def addPad(self, extraBytes, blockSize):
- """ Add padding to a binary string to make it an even multiple
- of the block size """
- blocks, numExtraBytes = divmod(len(extraBytes), blockSize)
- padLength = blockSize - numExtraBytes
- return extraBytes + padLength*chr(padLength)
-
- def removePad(self, paddedBinaryString, blockSize):
- """ Remove padding from a binary string """
- if not(0<len(paddedBinaryString)):
- raise DecryptNotBlockAlignedError, 'Expected More Data'
- return paddedBinaryString[:-ord(paddedBinaryString[-1])]
-
- class noPadding(Pad):
- """ No padding. Use this to get ECB behavior from encrypt/decrypt """
-
- def addPad(self, extraBytes, blockSize):
- """ Add no padding """
- return extraBytes
-
- def removePad(self, paddedBinaryString, blockSize):
- """ Remove no padding """
- return paddedBinaryString
-
- """
- Rijndael encryption algorithm
- This byte oriented implementation is intended to closely
- match FIPS specification for readability. It is not implemented
- for performance.
- """
-
- class Rijndael(BlockCipher):
- """ Rijndael encryption algorithm """
- def __init__(self, key = None, padding = padWithPadLen(), keySize=16, blockSize=16 ):
- self.name = 'RIJNDAEL'
- self.keySize = keySize
- self.strength = keySize*8
- self.blockSize = blockSize # blockSize is in bytes
- self.padding = padding # change default to noPadding() to get normal ECB behavior
-
- assert( keySize%4==0 and NrTable[4].has_key(keySize/4)),'key size must be 16,20,24,29 or 32 bytes'
- assert( blockSize%4==0 and NrTable.has_key(blockSize/4)), 'block size must be 16,20,24,29 or 32 bytes'
-
- self.Nb = self.blockSize/4 # Nb is number of columns of 32 bit words
- self.Nk = keySize/4 # Nk is the key length in 32-bit words
- self.Nr = NrTable[self.Nb][self.Nk] # The number of rounds (Nr) is a function of
- # the block (Nb) and key (Nk) sizes.
- if key != None:
- self.setKey(key)
-
- def setKey(self, key):
- """ Set a key and generate the expanded key """
- assert( len(key) == (self.Nk*4) ), 'Key length must be same as keySize parameter'
- self.__expandedKey = keyExpansion(self, key)
- self.reset() # BlockCipher.reset()
-
- def encryptBlock(self, plainTextBlock):
- """ Encrypt a block, plainTextBlock must be a array of bytes [Nb by 4] """
- self.state = self._toBlock(plainTextBlock)
- AddRoundKey(self, self.__expandedKey[0:self.Nb])
- for round in range(1,self.Nr): #for round = 1 step 1 to Nr
- SubBytes(self)
- ShiftRows(self)
- MixColumns(self)
- AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
- SubBytes(self)
- ShiftRows(self)
- AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
- return self._toBString(self.state)
-
-
- def decryptBlock(self, encryptedBlock):
- """ decrypt a block (array of bytes) """
- self.state = self._toBlock(encryptedBlock)
- AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
- for round in range(self.Nr-1,0,-1):
- InvShiftRows(self)
- InvSubBytes(self)
- AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
- InvMixColumns(self)
- InvShiftRows(self)
- InvSubBytes(self)
- AddRoundKey(self, self.__expandedKey[0:self.Nb])
- return self._toBString(self.state)
-
- def _toBlock(self, bs):
- """ Convert binary string to array of bytes, state[col][row]"""
- assert ( len(bs) == 4*self.Nb ), 'Rijndarl blocks must be of size blockSize'
- return [[ord(bs[4*i]),ord(bs[4*i+1]),ord(bs[4*i+2]),ord(bs[4*i+3])] for i in range(self.Nb)]
-
- def _toBString(self, block):
- """ Convert block (array of bytes) to binary string """
- l = []
- for col in block:
- for rowElement in col:
- l.append(chr(rowElement))
- return ''.join(l)
- #-------------------------------------
- """ Number of rounds Nr = NrTable[Nb][Nk]
-
- Nb Nk=4 Nk=5 Nk=6 Nk=7 Nk=8
- ------------------------------------- """
- NrTable = {4: {4:10, 5:11, 6:12, 7:13, 8:14},
- 5: {4:11, 5:11, 6:12, 7:13, 8:14},
- 6: {4:12, 5:12, 6:12, 7:13, 8:14},
- 7: {4:13, 5:13, 6:13, 7:13, 8:14},
- 8: {4:14, 5:14, 6:14, 7:14, 8:14}}
- #-------------------------------------
- def keyExpansion(algInstance, keyString):
- """ Expand a string of size keySize into a larger array """
- Nk, Nb, Nr = algInstance.Nk, algInstance.Nb, algInstance.Nr # for readability
- key = [ord(byte) for byte in keyString] # convert string to list
- w = [[key[4*i],key[4*i+1],key[4*i+2],key[4*i+3]] for i in range(Nk)]
- for i in range(Nk,Nb*(Nr+1)):
- temp = w[i-1] # a four byte column
- if (i%Nk) == 0 :
- temp = temp[1:]+[temp[0]] # RotWord(temp)
- temp = [ Sbox[byte] for byte in temp ]
- temp[0] ^= Rcon[i/Nk]
- elif Nk > 6 and i%Nk == 4 :
- temp = [ Sbox[byte] for byte in temp ] # SubWord(temp)
- w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] )
- return w
-
- Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!!
- 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,
- 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91)
-
- #-------------------------------------
- def AddRoundKey(algInstance, keyBlock):
- """ XOR the algorithm state with a block of key material """
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] ^= keyBlock[column][row]
- #-------------------------------------
-
- def SubBytes(algInstance):
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] = Sbox[algInstance.state[column][row]]
-
- def InvSubBytes(algInstance):
- for column in range(algInstance.Nb):
- for row in range(4):
- algInstance.state[column][row] = InvSbox[algInstance.state[column][row]]
-
- Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,
- 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
- 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,
- 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
- 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,
- 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
- 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,
- 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
- 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,
- 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
- 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,
- 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
- 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,
- 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
- 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,
- 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
- 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,
- 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
- 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,
- 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
- 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,
- 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
- 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,
- 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
- 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,
- 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
- 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,
- 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
- 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,
- 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
- 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,
- 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
-
- InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
- 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
- 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
- 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
- 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
- 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
- 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
- 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
- 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
- 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
- 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
- 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
- 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
- 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
- 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
- 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
- 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
- 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
- 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
- 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
- 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
- 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
- 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
- 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
- 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
- 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
- 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
- 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
- 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
- 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
- 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
- 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
-
- #-------------------------------------
- """ For each block size (Nb), the ShiftRow operation shifts row i
- by the amount Ci. Note that row 0 is not shifted.
- Nb C1 C2 C3
- ------------------- """
- shiftOffset = { 4 : ( 0, 1, 2, 3),
- 5 : ( 0, 1, 2, 3),
- 6 : ( 0, 1, 2, 3),
- 7 : ( 0, 1, 2, 4),
- 8 : ( 0, 1, 3, 4) }
- def ShiftRows(algInstance):
- tmp = [0]*algInstance.Nb # list of size Nb
- for r in range(1,4): # row 0 reamains unchanged and can be skipped
- for c in range(algInstance.Nb):
- tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
- for c in range(algInstance.Nb):
- algInstance.state[c][r] = tmp[c]
- def InvShiftRows(algInstance):
- tmp = [0]*algInstance.Nb # list of size Nb
- for r in range(1,4): # row 0 reamains unchanged and can be skipped
- for c in range(algInstance.Nb):
- tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
- for c in range(algInstance.Nb):
- algInstance.state[c][r] = tmp[c]
- #-------------------------------------
- def MixColumns(a):
- Sprime = [0,0,0,0]
- for j in range(a.Nb): # for each column
- Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3])
- Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3])
- Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3])
- Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3])
- for i in range(4):
- a.state[j][i] = Sprime[i]
-
- def InvMixColumns(a):
- """ Mix the four bytes of every column in a linear way
- This is the opposite operation of Mixcolumn """
- Sprime = [0,0,0,0]
- for j in range(a.Nb): # for each column
- Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3])
- Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3])
- Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3])
- Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3])
- for i in range(4):
- a.state[j][i] = Sprime[i]
-
- #-------------------------------------
- def mul(a, b):
- """ Multiply two elements of GF(2^m)
- needed for MixColumn and InvMixColumn """
- if (a !=0 and b!=0):
- return Alogtable[(Logtable[a] + Logtable[b])%255]
- else:
- return 0
-
- Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3,
- 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193,
- 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120,
- 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142,
- 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56,
- 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16,
- 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186,
- 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87,
- 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232,
- 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160,
- 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183,
- 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157,
- 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209,
- 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171,
- 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165,
- 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7)
-
- Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53,
- 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170,
- 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49,
- 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205,
- 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136,
- 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154,
- 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163,
- 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160,
- 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65,
- 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117,
- 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128,
- 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84,
- 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202,
- 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14,
- 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23,
- 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1)
-
-
-
-
- """
- AES Encryption Algorithm
- The AES algorithm is just Rijndael algorithm restricted to the default
- blockSize of 128 bits.
- """
-
- class AES(Rijndael):
- """ The AES algorithm is the Rijndael block cipher restricted to block
- sizes of 128 bits and key sizes of 128, 192 or 256 bits
- """
- def __init__(self, key = None, padding = padWithPadLen(), keySize=16):
- """ Initialize AES, keySize is in bytes """
- if not (keySize == 16 or keySize == 24 or keySize == 32) :
- raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes'
-
- Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 )
-
- self.name = 'AES'
-
-
- """
- CBC mode of encryption for block ciphers.
- This algorithm mode wraps any BlockCipher to make a
- Cipher Block Chaining mode.
- """
- from random import Random # should change to crypto.random!!!
-
-
- class CBC(BlockCipher):
- """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode
- algorithms. The initialization (IV) is automatic if set to None. Padding
- is also automatic based on the Pad class used to initialize the algorithm
- """
- def __init__(self, blockCipherInstance, padding = padWithPadLen()):
- """ CBC algorithms are created by initializing with a BlockCipher instance """
- self.baseCipher = blockCipherInstance
- self.name = self.baseCipher.name + '_CBC'
- self.blockSize = self.baseCipher.blockSize
- self.keySize = self.baseCipher.keySize
- self.padding = padding
- self.baseCipher.padding = noPadding() # baseCipher should NOT pad!!
- self.r = Random() # for IV generation, currently uses
- # mediocre standard distro version <----------------
- import time
- newSeed = time.ctime()+str(self.r) # seed with instance location
- self.r.seed(newSeed) # to make unique
- self.reset()
-
- def setKey(self, key):
- self.baseCipher.setKey(key)
-
- # Overload to reset both CBC state and the wrapped baseCipher
- def resetEncrypt(self):
- BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class)
- self.baseCipher.resetEncrypt() # reset base cipher encrypt state
-
- def resetDecrypt(self):
- BlockCipher.resetDecrypt(self) # reset CBC state (super class)
- self.baseCipher.resetDecrypt() # reset base cipher decrypt state
-
- def encrypt(self, plainText, iv=None, more=None):
- """ CBC encryption - overloads baseCipher to allow optional explicit IV
- when iv=None, iv is auto generated!
- """
- if self.encryptBlockCount == 0:
- self.iv = iv
- else:
- assert(iv==None), 'IV used only on first call to encrypt'
-
- return BlockCipher.encrypt(self,plainText, more=more)
-
- def decrypt(self, cipherText, iv=None, more=None):
- """ CBC decryption - overloads baseCipher to allow optional explicit IV
- when iv=None, iv is auto generated!
- """
- if self.decryptBlockCount == 0:
- self.iv = iv
- else:
- assert(iv==None), 'IV used only on first call to decrypt'
-
- return BlockCipher.decrypt(self, cipherText, more=more)
-
- def encryptBlock(self, plainTextBlock):
- """ CBC block encryption, IV is set with 'encrypt' """
- auto_IV = ''
- if self.encryptBlockCount == 0:
- if self.iv == None:
- # generate IV and use
- self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)])
- self.prior_encr_CT_block = self.iv
- auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic
- else: # application provided IV
- assert(len(self.iv) == self.blockSize ),'IV must be same length as block'
- self.prior_encr_CT_block = self.iv
- """ encrypt the prior CT XORed with the PT """
- ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) )
- self.prior_encr_CT_block = ct
- return auto_IV+ct
-
- def decryptBlock(self, encryptedBlock):
- """ Decrypt a single block """
-
- if self.decryptBlockCount == 0: # first call, process IV
- if self.iv == None: # auto decrypt IV?
- self.prior_CT_block = encryptedBlock
- return ''
- else:
- assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption"
- self.prior_CT_block = self.iv
-
- dct = self.baseCipher.decryptBlock(encryptedBlock)
- """ XOR the prior decrypted CT with the prior CT """
- dct_XOR_priorCT = xor( self.prior_CT_block, dct )
-
- self.prior_CT_block = encryptedBlock
-
- return dct_XOR_priorCT
-
-
- """
- AES_CBC Encryption Algorithm
- """
-
- class aescbc_AES_CBC(CBC):
- """ AES encryption in CBC feedback mode """
- def __init__(self, key=None, padding=padWithPadLen(), keySize=16):
- CBC.__init__( self, AES(key, noPadding(), keySize), padding)
- self.name = 'AES_CBC'
-
- class AES_CBC(object):
- def __init__(self):
- self._key = None
- self._iv = None
- self.aes = None
-
- def set_decrypt_key(self, userkey, iv):
- self._key = userkey
- self._iv = iv
- self.aes = aescbc_AES_CBC(userkey, noPadding(), len(userkey))
-
- def decrypt(self, data):
- iv = self._iv
- cleartext = self.aes.decrypt(iv + data)
- return cleartext
-
- import hmac
-
- class KeyIVGen(object):
- # this only exists in openssl so we will use pure python implementation instead
- # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
- # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
- def pbkdf2(self, passwd, salt, iter, keylen):
-
- def xorstr( a, b ):
- if len(a) != len(b):
- raise Exception("xorstr(): lengths differ")
- return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b)))
-
- def prf( h, data ):
- hm = h.copy()
- hm.update( data )
- return hm.digest()
-
- def pbkdf2_F( h, salt, itercount, blocknum ):
- U = prf( h, salt + pack('>i',blocknum ) )
- T = U
- for i in range(2, itercount+1):
- U = prf( h, U )
- T = xorstr( T, U )
- return T
-
- sha = hashlib.sha1
- digest_size = sha().digest_size
- # l - number of output blocks to produce
- l = keylen / digest_size
- if keylen % digest_size != 0:
- l += 1
- h = hmac.new( passwd, None, sha )
- T = ""
- for i in range(1, l+1):
- T += pbkdf2_F( h, salt, iter, i )
- return T[0: keylen]
-
- def UnprotectHeaderData(encryptedData):
- passwdData = 'header_key_data'
- salt = 'HEADER.2011'
- iter = 0x80
- keylen = 0x100
- key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen)
- key = key_iv[0:32]
- iv = key_iv[32:48]
- aes=AES_CBC()
- aes.set_decrypt_key(key, iv)
- cleartext = aes.decrypt(encryptedData)
- return cleartext
-
- # Various character maps used to decrypt kindle info values.
- # Probably supposed to act as obfuscation
- charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
- charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE"
- # New maps in K4PC 1.9.0
- testMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
- testMap6 = "9YzAb0Cd1Ef2n5Pr6St7Uvh3Jk4M8WxG"
- testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD"
-
- # interface with Windows OS Routines
- class DataBlob(Structure):
- _fields_ = [('cbData', c_uint),
- ('pbData', c_void_p)]
- DataBlob_p = POINTER(DataBlob)
-
-
- def GetSystemDirectory():
- GetSystemDirectoryW = kernel32.GetSystemDirectoryW
- GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
- GetSystemDirectoryW.restype = c_uint
- def GetSystemDirectory():
- buffer = create_unicode_buffer(MAX_PATH + 1)
- GetSystemDirectoryW(buffer, len(buffer))
- return buffer.value
- return GetSystemDirectory
- GetSystemDirectory = GetSystemDirectory()
-
- def GetVolumeSerialNumber():
- GetVolumeInformationW = kernel32.GetVolumeInformationW
- GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
- POINTER(c_uint), POINTER(c_uint),
- POINTER(c_uint), c_wchar_p, c_uint]
- GetVolumeInformationW.restype = c_uint
- def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'):
- vsn = c_uint(0)
- GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
- return str(vsn.value)
- return GetVolumeSerialNumber
- GetVolumeSerialNumber = GetVolumeSerialNumber()
-
- def GetIDString():
- vsn = GetVolumeSerialNumber()
- #print('Using Volume Serial Number for ID: '+vsn)
- return vsn
-
- def getLastError():
- GetLastError = kernel32.GetLastError
- GetLastError.argtypes = None
- GetLastError.restype = c_uint
- def getLastError():
- return GetLastError()
- return getLastError
- getLastError = getLastError()
+ # Get the Mazama Random number
+ MazamaRandomNumber = (kindleDatabase[1])['MazamaRandomNumber'].decode('hex').encode('ascii')
- def GetUserName():
- GetUserNameW = advapi32.GetUserNameW
- GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
- GetUserNameW.restype = c_uint
- def GetUserName():
- buffer = create_unicode_buffer(2)
- size = c_uint(len(buffer))
- while not GetUserNameW(buffer, byref(size)):
- errcd = getLastError()
- if errcd == 234:
- # bad wine implementation up through wine 1.3.21
- return "AlternateUserName"
- buffer = create_unicode_buffer(len(buffer) * 2)
- size.value = len(buffer)
- return buffer.value.encode('utf-16-le')[::2]
- return GetUserName
- GetUserName = GetUserName()
+ # Get the kindle account token
+ kindleAccountToken = (kindleDatabase[1])['kindle.account.tokens'].decode('hex').encode('ascii')
- def CryptUnprotectData():
- _CryptUnprotectData = crypt32.CryptUnprotectData
- _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
- c_void_p, c_void_p, c_uint, DataBlob_p]
- _CryptUnprotectData.restype = c_uint
- def CryptUnprotectData(indata, entropy, flags):
- indatab = create_string_buffer(indata)
- indata = DataBlob(len(indata), cast(indatab, c_void_p))
- entropyb = create_string_buffer(entropy)
- entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
- outdata = DataBlob()
- if not _CryptUnprotectData(byref(indata), None, byref(entropy),
- None, None, flags, byref(outdata)):
- # raise DrmException("Failed to Unprotect Data")
- return 'failed'
- return string_at(outdata.pbData, outdata.cbData)
- return CryptUnprotectData
- CryptUnprotectData = CryptUnprotectData()
+ # Get the IDString used to decode the Kindle Info file
+ IDString = (kindleDatabase[1])['IDString'].decode('hex').encode('ascii')
+ # Get the UserName stored when the Kindle Info file was decoded
+ UserName = (kindleDatabase[1])['UserName'].decode('hex').encode('ascii')
- # Locate all of the kindle-info style files and return as list
- def getKindleInfoFiles():
- kInfoFiles = []
- # some 64 bit machines do not have the proper registry key for some reason
- # or the pythonn interface to the 32 vs 64 bit registry is broken
- path = ""
- if 'LOCALAPPDATA' in os.environ.keys():
- path = os.environ['LOCALAPPDATA']
- else:
- # User Shell Folders show take precedent over Shell Folders if present
- try:
- regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\")
- path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
- if not os.path.isdir(path):
- path = ""
- try:
- regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
- path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
- if not os.path.isdir(path):
- path = ""
- except RegError:
- pass
- except RegError:
- pass
+ except KeyError:
+ print u"Keys not found in the database {0}.".format(kindleDatabase[0])
+ return pids
- found = False
- if path == "":
- print ('Could not find the folder in which to look for kinfoFiles.')
- else:
- print('searching for kinfoFiles in ' + path)
+ # Get the ID string used
+ encodedIDString = encodeHash(IDString,charMap1)
- # look for (K4PC 1.9.0 and later) .kinf2011 file
- kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011'
- if os.path.isfile(kinfopath):
- found = True
- print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath)
- kInfoFiles.append(kinfopath)
+ # Get the current user name
+ encodedUsername = encodeHash(UserName,charMap1)
- # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file
- kinfopath = path +'\\Amazon\\Kindle\\storage\\rainier.2.1.1.kinf'
- if os.path.isfile(kinfopath):
- found = True
- print('Found K4PC 1.6-1.8 kinf file: ' + kinfopath)
- kInfoFiles.append(kinfopath)
+ # concat, hash and encode to calculate the DSN
+ DSN = encode(SHA1(MazamaRandomNumber+encodedIDString+encodedUsername),charMap1)
- # look for (K4PC 1.5.0 and later) rainier.2.1.1.kinf file
- kinfopath = path +'\\Amazon\\Kindle For PC\\storage\\rainier.2.1.1.kinf'
- if os.path.isfile(kinfopath):
- found = True
- print('Found K4PC 1.5 kinf file: ' + kinfopath)
- kInfoFiles.append(kinfopath)
+ # Compute the device PID (for which I can tell, is used for nothing).
+ table = generatePidEncryptionTable()
+ devicePID = generateDevicePID(table,DSN,4)
+ devicePID = checksumPid(devicePID)
+ pids.append(devicePID)
- # look for original (earlier than K4PC 1.5.0) kindle-info files
- kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
- if os.path.isfile(kinfopath):
- found = True
- print('Found K4PC kindle.info file: ' + kinfopath)
- kInfoFiles.append(kinfopath)
+ # Compute book PIDs
- if not found:
- print('No K4PC kindle.info/kinf/kinf2011 files have been found.')
- return kInfoFiles
+ # book pid
+ pidHash = SHA1(DSN+kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pids.append(bookPID)
+ # variant 1
+ pidHash = SHA1(kindleAccountToken+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pids.append(bookPID)
- # determine type of kindle info provided and return a
- # database of keynames and values
- def getDBfromFile(kInfoFile):
- names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF']
- DB = {}
- with open(kInfoFile, 'rb') as infoReader:
- hdr = infoReader.read(1)
- data = infoReader.read()
+ # variant 2
+ pidHash = SHA1(DSN+rec209+token)
+ bookPID = encodePID(pidHash)
+ bookPID = checksumPid(bookPID)
+ pids.append(bookPID)
- if data.find('{') != -1 :
- # older style kindle-info file
- items = data.split('{')
- for item in items:
- if item != '':
- keyhash, rawdata = item.split(':')
- keyname = "unknown"
- for name in names:
- if encodeHash(name,charMap2) == keyhash:
- keyname = name
- break
- if keyname == "unknown":
- keyname = keyhash
- encryptedValue = decode(rawdata,charMap2)
- DB[keyname] = CryptUnprotectData(encryptedValue, "", 0)
- elif hdr == '/':
- # else rainier-2-1-1 .kinf file
- # the .kinf file uses "/" to separate it into records
- # so remove the trailing "/" to make it easy to use split
- data = data[:-1]
- items = data.split('/')
+ return pids
- # loop through the item records until all are processed
- while len(items) > 0:
+def getPidList(md1, md2, serials=[], kDatabases=[]):
+ pidlst = []
- # get the first item record
- item = items.pop(0)
+ if kDatabases is None:
+ kDatabases = []
+ if serials is None:
+ serials = []
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
-
- # the raw keyhash string is used to create entropy for the actual
- # CryptProtectData Blob that represents that keys contents
- entropy = SHA1(keyhash)
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- keyname = "unknown"
- for name in names:
- if encodeHash(name,charMap5) == keyhash:
- keyname = name
- break
- if keyname == "unknown":
- keyname = keyhash
- # the charMap5 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using charMap5 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the charMap5 encoded contents seems to be:
- # len(contents)-largest prime number <= int(len(content)/3)
- # (in other words split "about" 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by charMap5
- encdata = "".join(edlst)
- contlen = len(encdata)
- noffset = contlen - primes(int(contlen/3))[-1]
-
- # now properly split and recombine
- # by moving noffset chars from the start of the
- # string to the end of the string
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using Map5 to get the CryptProtect Data
- encryptedValue = decode(encdata,charMap5)
- DB[keyname] = CryptUnprotectData(encryptedValue, entropy, 1)
- else:
- # else newest .kinf2011 style .kinf file
- # the .kinf file uses "/" to separate it into records
- # so remove the trailing "/" to make it easy to use split
- # need to put back the first char read because it it part
- # of the added entropy blob
- data = hdr + data[:-1]
- items = data.split('/')
-
- # starts with and encoded and encrypted header blob
- headerblob = items.pop(0)
- encryptedValue = decode(headerblob, testMap1)
- cleartext = UnprotectHeaderData(encryptedValue)
- # now extract the pieces that form the added entropy
- pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
- for m in re.finditer(pattern, cleartext):
- added_entropy = m.group(2) + m.group(4)
-
-
- # loop through the item records until all are processed
- while len(items) > 0:
-
- # get the first item record
- item = items.pop(0)
-
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
-
- # the sha1 of raw keyhash string is used to create entropy along
- # with the added entropy provided above from the headerblob
- entropy = SHA1(keyhash) + added_entropy
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- # key names now use the new testMap8 encoding
- keyname = "unknown"
- for name in names:
- if encodeHash(name,testMap8) == keyhash:
- keyname = name
- break
-
- # the testMap8 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using testMap8 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the testMap8 encoded contents seems to be:
- # len(contents)-largest prime number <= int(len(content)/3)
- # (in other words split "about" 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by testMap8
- # by moving noffset chars from the start of the
- # string to the end of the string
- encdata = "".join(edlst)
- contlen = len(encdata)
- noffset = contlen - primes(int(contlen/3))[-1]
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using new testMap8 to get the original CryptProtect Data
- encryptedValue = decode(encdata,testMap8)
- cleartext = CryptUnprotectData(encryptedValue, entropy, 1)
- DB[keyname] = cleartext
-
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
- print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1"))
- # store values used in decryption
- DB['IDString'] = GetIDString()
- DB['UserName'] = GetUserName()
- else:
- DB = {}
- return DB
-elif isosx:
- import copy
- import subprocess
-
- # interface to needed routines in openssl's libcrypto
- def _load_crypto_libcrypto():
- from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, addressof, string_at, cast
- from ctypes.util import find_library
-
- libcrypto = find_library('crypto')
- if libcrypto is None:
- raise DrmException(u"libcrypto not found")
- libcrypto = CDLL(libcrypto)
-
- # From OpenSSL's crypto aes header
- #
- # AES_ENCRYPT 1
- # AES_DECRYPT 0
- # AES_MAXNR 14 (in bytes)
- # AES_BLOCK_SIZE 16 (in bytes)
- #
- # struct aes_key_st {
- # unsigned long rd_key[4 *(AES_MAXNR + 1)];
- # int rounds;
- # };
- # typedef struct aes_key_st AES_KEY;
- #
- # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
- #
- # note: the ivec string, and output buffer are both mutable
- # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
- # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc);
-
- AES_MAXNR = 14
- c_char_pp = POINTER(c_char_p)
- c_int_p = POINTER(c_int)
-
- class AES_KEY(Structure):
- _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
- AES_KEY_p = POINTER(AES_KEY)
-
- def F(restype, name, argtypes):
- func = getattr(libcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
-
- AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
-
- # From OpenSSL's Crypto evp/p5_crpt2.c
- #
- # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
- # const unsigned char *salt, int saltlen, int iter,
- # int keylen, unsigned char *out);
-
- PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
- [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
-
- class LibCrypto(object):
- def __init__(self):
- self._blocksize = 0
- self._keyctx = None
- self._iv = 0
-
- def set_decrypt_key(self, userkey, iv):
- self._blocksize = len(userkey)
- if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
- raise DrmException(u"AES improper key used")
- return
- keyctx = self._keyctx = AES_KEY()
- self._iv = iv
- self._userkey = userkey
- rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
- if rv < 0:
- raise DrmException(u"Failed to initialize AES key")
-
- def decrypt(self, data):
- out = create_string_buffer(len(data))
- mutable_iv = create_string_buffer(self._iv, len(self._iv))
- keyctx = self._keyctx
- rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0)
- if rv == 0:
- raise DrmException(u"AES decryption failed")
- return out.raw
-
- def keyivgen(self, passwd, salt, iter, keylen):
- saltlen = len(salt)
- passlen = len(passwd)
- out = create_string_buffer(keylen)
- rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
- return out.raw
- return LibCrypto
-
- def _load_crypto():
- LibCrypto = None
+ for kDatabase in kDatabases:
try:
- LibCrypto = _load_crypto_libcrypto()
- except (ImportError, DrmException):
- pass
- return LibCrypto
-
- LibCrypto = _load_crypto()
-
- # Various character maps used to decrypt books. Probably supposed to act as obfuscation
- charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M'
- charMap2 = 'ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM'
-
- # For kinf approach of K4Mac 1.6.X or later
- # On K4PC charMap5 = 'AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE'
- # For Mac they seem to re-use charMap2 here
- charMap5 = charMap2
-
- # new in K4M 1.9.X
- testMap8 = 'YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD'
-
- # uses a sub process to get the Hard Drive Serial Number using ioreg
- # returns serial numbers of all internal hard drive drives
- def GetVolumesSerialNumbers():
- sernums = []
- sernum = os.getenv('MYSERIALNUMBER')
- if sernum != None:
- sernums.append(sernum.strip())
- cmdline = '/usr/sbin/ioreg -w 0 -r -c AppleAHCIDiskDriver'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- bsdname = None
- sernum = None
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('\"Serial Number\" = \"')
- if pp >= 0:
- sernum = resline[pp+19:-1]
- sernums.append(sernum.strip())
- return sernums
-
- def GetUserHomeAppSupKindleDirParitionName():
- home = os.getenv('HOME')
- dpath = home + '/Library'
- cmdline = '/sbin/mount'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- disk = ''
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- if resline.startswith('/dev'):
- (devpart, mpath) = resline.split(' on ')
- dpart = devpart[5:]
- pp = mpath.find('(')
- if pp >= 0:
- mpath = mpath[:pp-1]
- if dpath.startswith(mpath):
- disk = dpart
- return disk
-
- # uses a sub process to get the UUID of the specified disk partition using ioreg
- def GetDiskPartitionUUIDs(diskpart):
- uuids = []
- uuidnum = os.getenv('MYUUIDNUMBER')
- if uuidnum != None:
- uuids.append(strip(uuidnum))
- cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- bsdname = None
- uuidnum = None
- foundIt = False
- nest = 0
- uuidnest = -1
- partnest = -2
- for j in xrange(cnt):
- resline = reslst[j]
- if resline.find('{') >= 0:
- nest += 1
- if resline.find('}') >= 0:
- nest -= 1
- pp = resline.find('\"UUID\" = \"')
- if pp >= 0:
- uuidnum = resline[pp+10:-1]
- uuidnum = uuidnum.strip()
- uuidnest = nest
- if partnest == uuidnest and uuidnest > 0:
- foundIt = True
- break
- bb = resline.find('\"BSD Name\" = \"')
- if bb >= 0:
- bsdname = resline[bb+14:-1]
- bsdname = bsdname.strip()
- if (bsdname == diskpart):
- partnest = nest
- else :
- partnest = -2
- if partnest == uuidnest and partnest > 0:
- foundIt = True
- break
- if nest == 0:
- partnest = -2
- uuidnest = -1
- uuidnum = None
- bsdname = None
- if foundIt:
- uuids.append(uuidnum)
- return uuids
-
- def GetMACAddressesMunged():
- macnums = []
- macnum = os.getenv('MYMACNUM')
- if macnum != None:
- macnums.append(macnum)
- cmdline = '/sbin/ifconfig en0'
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
- out1, out2 = p.communicate()
- reslst = out1.split('\n')
- cnt = len(reslst)
- macnum = None
- foundIt = False
- for j in xrange(cnt):
- resline = reslst[j]
- pp = resline.find('ether ')
- if pp >= 0:
- macnum = resline[pp+6:-1]
- macnum = macnum.strip()
- # print 'original mac', macnum
- # now munge it up the way Kindle app does
- # by xoring it with 0xa5 and swapping elements 3 and 4
- maclst = macnum.split(':')
- n = len(maclst)
- if n != 6:
- fountIt = False
- break
- for i in range(6):
- maclst[i] = int('0x' + maclst[i], 0)
- mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
- mlst[5] = maclst[5] ^ 0xa5
- mlst[4] = maclst[3] ^ 0xa5
- mlst[3] = maclst[4] ^ 0xa5
- mlst[2] = maclst[2] ^ 0xa5
- mlst[1] = maclst[1] ^ 0xa5
- mlst[0] = maclst[0] ^ 0xa5
- macnum = '%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x' % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5])
- foundIt = True
- break
- if foundIt:
- macnums.append(macnum)
- return macnums
-
-
- # uses unix env to get username instead of using sysctlbyname
- def GetUserName():
- username = os.getenv('USER')
- return username
-
- def GetIDStrings():
- # Return all possible ID Strings
- strings = []
- strings.extend(GetMACAddressesMunged())
- strings.extend(GetVolumesSerialNumbers())
- diskpart = GetUserHomeAppSupKindleDirParitionName()
- strings.extend(GetDiskPartitionUUIDs(diskpart))
- strings.append('9999999999')
- #print strings
- return strings
-
-
- # implements an Pseudo Mac Version of Windows built-in Crypto routine
- # used by Kindle for Mac versions < 1.6.0
- class CryptUnprotectData(object):
- def __init__(self, IDString):
- sp = IDString + '!@#' + GetUserName()
- passwdData = encode(SHA256(sp),charMap1)
- salt = '16743'
- self.crp = LibCrypto()
- iter = 0x3e8
- keylen = 0x80
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext,charMap1)
- return cleartext
-
-
- # implements an Pseudo Mac Version of Windows built-in Crypto routine
- # used for Kindle for Mac Versions >= 1.6.0
- class CryptUnprotectDataV2(object):
- def __init__(self, IDString):
- sp = GetUserName() + ':&%:' + IDString
- passwdData = encode(SHA256(sp),charMap5)
- # salt generation as per the code
- salt = 0x0512981d * 2 * 1 * 1
- salt = str(salt) + GetUserName()
- salt = encode(salt,charMap5)
- self.crp = LibCrypto()
- iter = 0x800
- keylen = 0x400
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext, charMap5)
- return cleartext
-
-
- # unprotect the new header blob in .kinf2011
- # used in Kindle for Mac Version >= 1.9.0
- def UnprotectHeaderData(encryptedData):
- passwdData = 'header_key_data'
- salt = 'HEADER.2011'
- iter = 0x80
- keylen = 0x100
- crp = LibCrypto()
- key_iv = crp.keyivgen(passwdData, salt, iter, keylen)
- key = key_iv[0:32]
- iv = key_iv[32:48]
- crp.set_decrypt_key(key,iv)
- cleartext = crp.decrypt(encryptedData)
- return cleartext
-
+ pidlst.extend(getK4Pids(md1, md2, kDatabase))
+ except Exception, e:
+ print u"Error getting PIDs from database {0}: {1}".format(kDatabase[0],e.args[0])
+ traceback.print_exc()
- # implements an Pseudo Mac Version of Windows built-in Crypto routine
- # used for Kindle for Mac Versions >= 1.9.0
- class CryptUnprotectDataV3(object):
- def __init__(self, entropy, IDString):
- sp = GetUserName() + '+@#$%+' + IDString
- passwdData = encode(SHA256(sp),charMap2)
- salt = entropy
- self.crp = LibCrypto()
- iter = 0x800
- keylen = 0x400
- key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
- self.key = key_iv[0:32]
- self.iv = key_iv[32:48]
- self.crp.set_decrypt_key(self.key, self.iv)
-
- def decrypt(self, encryptedData):
- cleartext = self.crp.decrypt(encryptedData)
- cleartext = decode(cleartext, charMap2)
- return cleartext
-
-
- # Locate the .kindle-info files
- def getKindleInfoFiles():
- # file searches can take a long time on some systems, so just look in known specific places.
- kInfoFiles=[]
- found = False
- home = os.getenv('HOME')
- # check for .kinf2011 file in new location (App Store Kindle for Mac)
- testpath = home + '/Library/Containers/com.amazon.Kindle/Data/Library/Application Support/Kindle/storage/.kinf2011'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac kinf2011 file: ' + testpath)
- found = True
- # check for .kinf2011 files from 1.10
- testpath = home + '/Library/Application Support/Kindle/storage/.kinf2011'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac kinf2011 file: ' + testpath)
- found = True
- # check for .rainier-2.1.1-kinf files from 1.6
- testpath = home + '/Library/Application Support/Kindle/storage/.rainier-2.1.1-kinf'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac rainier file: ' + testpath)
- found = True
- # check for .kindle-info files from 1.4
- testpath = home + '/Library/Application Support/Kindle/storage/.kindle-info'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac kindle-info file: ' + testpath)
- found = True
- # check for .kindle-info file from 1.2.2
- testpath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac kindle-info file: ' + testpath)
- found = True
- # check for .kindle-info file from 1.0 beta 1 (27214)
- testpath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info'
- if os.path.isfile(testpath):
- kInfoFiles.append(testpath)
- print('Found k4Mac kindle-info file: ' + testpath)
- found = True
- if not found:
- print('No k4Mac kindle-info/rainier/kinf2011 files have been found.')
- return kInfoFiles
-
- # determine type of kindle info provided and return a
- # database of keynames and values
- def getDBfromFile(kInfoFile):
- names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF']
- with open(kInfoFile, 'rb') as infoReader:
- filehdr = infoReader.read(1)
- filedata = infoReader.read()
-
- IDStrings = GetIDStrings()
- for IDString in IDStrings:
- DB = {}
- #print "trying IDString:",IDString
- try:
- hdr = filehdr
- data = filedata
- if data.find('[') != -1 :
- # older style kindle-info file
- cud = CryptUnprotectData(IDString)
- items = data.split('[')
- for item in items:
- if item != '':
- keyhash, rawdata = item.split(':')
- keyname = 'unknown'
- for name in names:
- if encodeHash(name,charMap2) == keyhash:
- keyname = name
- break
- if keyname == 'unknown':
- keyname = keyhash
- encryptedValue = decode(rawdata,charMap2)
- cleartext = cud.decrypt(encryptedValue)
- if len(cleartext) > 0:
- DB[keyname] = cleartext
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
- break
- elif hdr == '/':
- # else newer style .kinf file used by K4Mac >= 1.6.0
- # the .kinf file uses '/' to separate it into records
- # so remove the trailing '/' to make it easy to use split
- data = data[:-1]
- items = data.split('/')
- cud = CryptUnprotectDataV2(IDString)
-
- # loop through the item records until all are processed
- while len(items) > 0:
-
- # get the first item record
- item = items.pop(0)
-
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
- keyname = 'unknown'
-
- # the raw keyhash string is also used to create entropy for the actual
- # CryptProtectData Blob that represents that keys contents
- # 'entropy' not used for K4Mac only K4PC
- # entropy = SHA1(keyhash)
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- keyname = 'unknown'
- for name in names:
- if encodeHash(name,charMap5) == keyhash:
- keyname = name
- break
- if keyname == 'unknown':
- keyname = keyhash
-
- # the charMap5 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using charMap5 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the charMap5 encoded contents seems to be:
- # len(contents) - largest prime number less than or equal to int(len(content)/3)
- # (in other words split 'about' 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by charMap5
- encdata = ''.join(edlst)
- contlen = len(encdata)
-
- # now properly split and recombine
- # by moving noffset chars from the start of the
- # string to the end of the string
- noffset = contlen - primes(int(contlen/3))[-1]
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using charMap5 to get the CryptProtect Data
- encryptedValue = decode(encdata,charMap5)
- cleartext = cud.decrypt(encryptedValue)
- if len(cleartext) > 0:
- DB[keyname] = cleartext
-
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
- break
- else:
- # the latest .kinf2011 version for K4M 1.9.1
- # put back the hdr char, it is needed
- data = hdr + data
- data = data[:-1]
- items = data.split('/')
-
- # the headerblob is the encrypted information needed to build the entropy string
- headerblob = items.pop(0)
- encryptedValue = decode(headerblob, charMap1)
- cleartext = UnprotectHeaderData(encryptedValue)
-
- # now extract the pieces in the same way
- # this version is different from K4PC it scales the build number by multipying by 735
- pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
- for m in re.finditer(pattern, cleartext):
- entropy = str(int(m.group(2)) * 0x2df) + m.group(4)
-
- cud = CryptUnprotectDataV3(entropy,IDString)
-
- # loop through the item records until all are processed
- while len(items) > 0:
-
- # get the first item record
- item = items.pop(0)
-
- # the first 32 chars of the first record of a group
- # is the MD5 hash of the key name encoded by charMap5
- keyhash = item[0:32]
- keyname = 'unknown'
-
- # unlike K4PC the keyhash is not used in generating entropy
- # entropy = SHA1(keyhash) + added_entropy
- # entropy = added_entropy
-
- # the remainder of the first record when decoded with charMap5
- # has the ':' split char followed by the string representation
- # of the number of records that follow
- # and make up the contents
- srcnt = decode(item[34:],charMap5)
- rcnt = int(srcnt)
-
- # read and store in rcnt records of data
- # that make up the contents value
- edlst = []
- for i in xrange(rcnt):
- item = items.pop(0)
- edlst.append(item)
-
- keyname = 'unknown'
- for name in names:
- if encodeHash(name,testMap8) == keyhash:
- keyname = name
- break
- if keyname == 'unknown':
- keyname = keyhash
-
- # the testMap8 encoded contents data has had a length
- # of chars (always odd) cut off of the front and moved
- # to the end to prevent decoding using testMap8 from
- # working properly, and thereby preventing the ensuing
- # CryptUnprotectData call from succeeding.
-
- # The offset into the testMap8 encoded contents seems to be:
- # len(contents) - largest prime number less than or equal to int(len(content)/3)
- # (in other words split 'about' 2/3rds of the way through)
-
- # move first offsets chars to end to align for decode by testMap8
- encdata = ''.join(edlst)
- contlen = len(encdata)
-
- # now properly split and recombine
- # by moving noffset chars from the start of the
- # string to the end of the string
- noffset = contlen - primes(int(contlen/3))[-1]
- pfx = encdata[0:noffset]
- encdata = encdata[noffset:]
- encdata = encdata + pfx
-
- # decode using testMap8 to get the CryptProtect Data
- encryptedValue = decode(encdata,testMap8)
- cleartext = cud.decrypt(encryptedValue)
- # print keyname
- # print cleartext
- if len(cleartext) > 0:
- DB[keyname] = cleartext
-
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
- break
- except:
- pass
- if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
- # store values used in decryption
- print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName())
- DB['IDString'] = IDString
- DB['UserName'] = GetUserName()
- else:
- print u"Couldn't decrypt file."
- DB = {}
- return DB
-else:
- def getDBfromFile(kInfoFile):
- raise DrmException(u"This script only runs under Windows or Mac OS X.")
- return {}
-
-def kindlekeys(files = []):
- keys = []
- if files == []:
- files = getKindleInfoFiles()
- for file in files:
- key = getDBfromFile(file)
- if key:
- # convert all values to hex, just in case.
- for keyname in key:
- key[keyname]=key[keyname].encode('hex')
- keys.append(key)
- return keys
-
-# interface for Python DeDRM
-# returns single key or multiple keys, depending on path or file passed in
-def getkey(outpath, files=[]):
- keys = kindlekeys(files)
- if len(keys) > 0:
- if not os.path.isdir(outpath):
- outfile = outpath
- with file(outfile, 'w') as keyfileout:
- keyfileout.write(json.dumps(keys[0]))
- print u"Saved a key to {0}".format(outfile)
- else:
- keycount = 0
- for key in keys:
- while True:
- keycount += 1
- outfile = os.path.join(outpath,u"kindlekey{0:d}.k4i".format(keycount))
- if not os.path.exists(outfile):
- break
- with file(outfile, 'w') as keyfileout:
- keyfileout.write(json.dumps(key))
- print u"Saved a key to {0}".format(outfile)
- return True
- return False
-
-def usage(progname):
- print u"Finds, decrypts and saves the default Kindle For Mac/PC encryption keys."
- print u"Keys are saved to the current directory, or a specified output directory."
- print u"If a file name is passed instead of a directory, only the first key is saved, in that file."
- print u"Usage:"
- print u" {0:s} [-h] [-k <kindle.info>] [<outpath>]".format(progname)
-
-
-def cli_main():
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- argv=unicode_argv()
- progname = os.path.basename(argv[0])
- print u"{0} v{1}\nCopyright © 2010-2013 some_updates and Apprentice Alf".format(progname,__version__)
-
- try:
- opts, args = getopt.getopt(argv[1:], "hk:")
- except getopt.GetoptError, err:
- print u"Error in options or arguments: {0}".format(err.args[0])
- usage(progname)
- sys.exit(2)
-
- files = []
- for o, a in opts:
- if o == "-h":
- usage(progname)
- sys.exit(0)
- if o == "-k":
- files = [a]
-
- if len(args) > 1:
- usage(progname)
- sys.exit(2)
-
- if len(args) == 1:
- # save to the specified file or directory
- outpath = args[0]
- if not os.path.isabs(outpath):
- outpath = os.path.abspath(outpath)
- else:
- # save to the same directory as the script
- outpath = os.path.dirname(argv[0])
-
- # make sure the outpath is the
- outpath = os.path.realpath(os.path.normpath(outpath))
-
- if not getkey(outpath, files):
- print u"Could not retrieve Kindle for Mac/PC key."
- return 0
-
-
-def gui_main():
- try:
- import Tkinter
- import Tkconstants
- import tkMessageBox
- import traceback
- except:
- return cli_main()
-
- class ExceptionDialog(Tkinter.Frame):
- def __init__(self, root, text):
- Tkinter.Frame.__init__(self, root, border=5)
- label = Tkinter.Label(self, text=u"Unexpected error:",
- anchor=Tkconstants.W, justify=Tkconstants.LEFT)
- label.pack(fill=Tkconstants.X, expand=0)
- self.text = Tkinter.Text(self)
- self.text.pack(fill=Tkconstants.BOTH, expand=1)
-
- self.text.insert(Tkconstants.END, text)
-
-
- argv=unicode_argv()
- root = Tkinter.Tk()
- root.withdraw()
- progpath, progname = os.path.split(argv[0])
- success = False
- try:
- keys = kindlekeys()
- keycount = 0
- for key in keys:
- while True:
- keycount += 1
- outfile = os.path.join(progpath,u"kindlekey{0:d}.k4i".format(keycount))
- if not os.path.exists(outfile):
- break
-
- with file(outfile, 'w') as keyfileout:
- keyfileout.write(json.dumps(key))
- success = True
- tkMessageBox.showinfo(progname, u"Key successfully retrieved to {0}".format(outfile))
- except DrmException, e:
- tkMessageBox.showerror(progname, u"Error: {0}".format(str(e)))
- except Exception:
- root.wm_state('normal')
- root.title(progname)
- text = traceback.format_exc()
- ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1)
- root.mainloop()
- if not success:
- return 1
- return 0
+ for serialnum in serials:
+ try:
+ pidlst.extend(getKindlePids(md1, md2, serialnum))
+ except Exception, e:
+ print u"Error getting PIDs from serial number {0}: {1}".format(serialnum ,e.args[0])
+ traceback.print_exc()
-if __name__ == '__main__':
- if len(sys.argv) > 1:
- sys.exit(cli_main())
- sys.exit(gui_main())
+ return pidlst
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py b/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py
index 8bbcf69..8852769 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/kindlekey.py
@@ -1,18 +1,40 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# Mobipocket PID calculator v0.4 for Amazon Kindle.
-# Copyright (c) 2007, 2009 Igor Skochinsky <[email protected]>
-# History:
-# 0.1 Initial release
-# 0.2 Added support for generating PID for iPhone (thanks to mbp)
-# 0.3 changed to autoflush stdout, fixed return code usage
-# 0.3 updated for unicode
-# 0.4 Added support for serial numbers starting with '9', fixed unicode bugs.
-# 0.5 moved unicode_argv call inside main for Windows DeDRM compatibility
+from __future__ import with_statement
-import sys
-import binascii
+# kindlekey.py
+# Copyright © 2010-2013 by some_updates and Apprentice Alf
+#
+# Currently requires alfcrypto.py which requires the alfcrypto library
+
+# Revision history:
+# 1.0 - Kindle info file decryption, extracted from k4mobidedrm, etc.
+# 1.1 - Added Tkinter to match adobekey.py
+# 1.2 - Fixed testing of successful retrieval on Mac
+# 1.3 - Added getkey interface for Windows DeDRM application
+# Simplified some of the Kindle for Mac code.
+# 1.4 - Remove dependency on alfcrypto
+# 1.5 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 1.6 - Fixed a problem getting the disk serial numbers
+# 1.7 - Work if TkInter is missing
+# 1.8 - Fixes for Kindle for Mac, and non-ascii in Windows user names
+# 1.9 - Fixes for Unicode in Windows user names
+
+
+"""
+Retrieve Kindle for PC/Mac user key.
+"""
+
+__license__ = 'GPL v3'
+__version__ = '1.9'
+
+import sys, os, re
+from struct import pack, unpack, unpack_from
+import json
+import getopt
+
+# Routines common to Mac and PC
# Wrap a stream so that output gets flushed immediately
# and also make sure that any unicode strings get
@@ -31,8 +53,11 @@ class SafeUnbuffered:
def __getattr__(self, attr):
return getattr(self.stream, attr)
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
+try:
+ from calibre.constants import iswindows, isosx
+except:
+ iswindows = sys.platform.startswith('win')
+ isosx = sys.platform.startswith('darwin')
def unicode_argv():
if iswindows:
@@ -41,8 +66,8 @@ def unicode_argv():
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
+ # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
+ # as a list of Unicode strings and encode them as utf-8
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
@@ -65,80 +90,1847 @@ def unicode_argv():
xrange(start, argc.value)]
# if we don't have any arguments at all, just pass back script name
# this should never happen
- return [u"kindlepid.py"]
+ return [u"kindlekey.py"]
else:
argvencoding = sys.stdin.encoding
if argvencoding == None:
argvencoding = "utf-8"
return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-if sys.hexversion >= 0x3000000:
- print 'This script is incompatible with Python 3.x. Please install Python 2.7.x.'
- sys.exit(2)
+class DrmException(Exception):
+ pass
+
+# crypto digestroutines
+import hashlib
+
+def MD5(message):
+ ctx = hashlib.md5()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA1(message):
+ ctx = hashlib.sha1()
+ ctx.update(message)
+ return ctx.digest()
+
+def SHA256(message):
+ ctx = hashlib.sha256()
+ ctx.update(message)
+ return ctx.digest()
+
+# For K4M/PC 1.6.X and later
+# generate table of prime number less than or equal to int n
+def primes(n):
+ if n==2: return [2]
+ elif n<2: return []
+ s=range(3,n+1,2)
+ mroot = n ** 0.5
+ half=(n+1)/2-1
+ i=0
+ m=3
+ while m <= mroot:
+ if s[i]:
+ j=(m*m-3)/2
+ s[j]=0
+ while j<half:
+ s[j]=0
+ j+=m
+ i=i+1
+ m=2*i+3
+ return [2]+[x for x in s if x]
+
+# Encode the bytes in data with the characters in map
+def encode(data, map):
+ result = ''
+ for char in data:
+ value = ord(char)
+ Q = (value ^ 0x80) // len(map)
+ R = value % len(map)
+ result += map[Q]
+ result += map[R]
+ return result
+
+# Hash the bytes in data and then encode the digest with the characters in map
+def encodeHash(data,map):
+ return encode(MD5(data),map)
+
+# Decode the string in data with the characters in map. Returns the decoded bytes
+def decode(data,map):
+ result = ''
+ for i in range (0,len(data)-1,2):
+ high = map.find(data[i])
+ low = map.find(data[i+1])
+ if (high == -1) or (low == -1) :
+ break
+ value = (((high * len(map)) ^ 0x80) & 0xFF) + low
+ result += pack('B',value)
+ return result
+
+# Routines unique to Mac and PC
+if iswindows:
+ from ctypes import windll, c_char_p, c_wchar_p, c_uint, POINTER, byref, \
+ create_unicode_buffer, create_string_buffer, CFUNCTYPE, addressof, \
+ string_at, Structure, c_void_p, cast
+
+ import _winreg as winreg
+ MAX_PATH = 255
+ kernel32 = windll.kernel32
+ advapi32 = windll.advapi32
+ crypt32 = windll.crypt32
+
+ try:
+ # try to get fast routines from alfcrypto
+ from alfcrypto import AES_CBC, KeyIVGen
+ except:
+ # alfcrypto not available, so use python implementations
+ """
+ Routines for doing AES CBC in one file
+
+ Modified by some_updates to extract
+ and combine only those parts needed for AES CBC
+ into one simple to add python file
+
+ Original Version
+ Copyright (c) 2002 by Paul A. Lambert
+ Under:
+ CryptoPy Artisitic License Version 1.0
+ See the wonderful pure python package cryptopy-1.2.5
+ and read its LICENSE.txt for complete license details.
+ """
+
+ class CryptoError(Exception):
+ """ Base class for crypto exceptions """
+ def __init__(self,errorMessage='Error!'):
+ self.message = errorMessage
+ def __str__(self):
+ return self.message
+
+ class InitCryptoError(CryptoError):
+ """ Crypto errors during algorithm initialization """
+ class BadKeySizeError(InitCryptoError):
+ """ Bad key size error """
+ class EncryptError(CryptoError):
+ """ Error in encryption processing """
+ class DecryptError(CryptoError):
+ """ Error in decryption processing """
+ class DecryptNotBlockAlignedError(DecryptError):
+ """ Error in decryption processing """
+
+ def xorS(a,b):
+ """ XOR two strings """
+ assert len(a)==len(b)
+ x = []
+ for i in range(len(a)):
+ x.append( chr(ord(a[i])^ord(b[i])))
+ return ''.join(x)
+
+ def xor(a,b):
+ """ XOR two strings """
+ x = []
+ for i in range(min(len(a),len(b))):
+ x.append( chr(ord(a[i])^ord(b[i])))
+ return ''.join(x)
+
+ """
+ Base 'BlockCipher' and Pad classes for cipher instances.
+ BlockCipher supports automatic padding and type conversion. The BlockCipher
+ class was written to make the actual algorithm code more readable and
+ not for performance.
+ """
+
+ class BlockCipher:
+ """ Block ciphers """
+ def __init__(self):
+ self.reset()
+
+ def reset(self):
+ self.resetEncrypt()
+ self.resetDecrypt()
+ def resetEncrypt(self):
+ self.encryptBlockCount = 0
+ self.bytesToEncrypt = ''
+ def resetDecrypt(self):
+ self.decryptBlockCount = 0
+ self.bytesToDecrypt = ''
+
+ def encrypt(self, plainText, more = None):
+ """ Encrypt a string and return a binary string """
+ self.bytesToEncrypt += plainText # append plainText to any bytes from prior encrypt
+ numBlocks, numExtraBytes = divmod(len(self.bytesToEncrypt), self.blockSize)
+ cipherText = ''
+ for i in range(numBlocks):
+ bStart = i*self.blockSize
+ ctBlock = self.encryptBlock(self.bytesToEncrypt[bStart:bStart+self.blockSize])
+ self.encryptBlockCount += 1
+ cipherText += ctBlock
+ if numExtraBytes > 0: # save any bytes that are not block aligned
+ self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+ else:
+ self.bytesToEncrypt = ''
+
+ if more == None: # no more data expected from caller
+ finalBytes = self.padding.addPad(self.bytesToEncrypt,self.blockSize)
+ if len(finalBytes) > 0:
+ ctBlock = self.encryptBlock(finalBytes)
+ self.encryptBlockCount += 1
+ cipherText += ctBlock
+ self.resetEncrypt()
+ return cipherText
+
+ def decrypt(self, cipherText, more = None):
+ """ Decrypt a string and return a string """
+ self.bytesToDecrypt += cipherText # append to any bytes from prior decrypt
+
+ numBlocks, numExtraBytes = divmod(len(self.bytesToDecrypt), self.blockSize)
+ if more == None: # no more calls to decrypt, should have all the data
+ if numExtraBytes != 0:
+ raise DecryptNotBlockAlignedError, 'Data not block aligned on decrypt'
+
+ # hold back some bytes in case last decrypt has zero len
+ if (more != None) and (numExtraBytes == 0) and (numBlocks >0) :
+ numBlocks -= 1
+ numExtraBytes = self.blockSize
+
+ plainText = ''
+ for i in range(numBlocks):
+ bStart = i*self.blockSize
+ ptBlock = self.decryptBlock(self.bytesToDecrypt[bStart : bStart+self.blockSize])
+ self.decryptBlockCount += 1
+ plainText += ptBlock
+
+ if numExtraBytes > 0: # save any bytes that are not block aligned
+ self.bytesToEncrypt = self.bytesToEncrypt[-numExtraBytes:]
+ else:
+ self.bytesToEncrypt = ''
+
+ if more == None: # last decrypt remove padding
+ plainText = self.padding.removePad(plainText, self.blockSize)
+ self.resetDecrypt()
+ return plainText
+
+
+ class Pad:
+ def __init__(self):
+ pass # eventually could put in calculation of min and max size extension
+
+ class padWithPadLen(Pad):
+ """ Pad a binary string with the length of the padding """
+
+ def addPad(self, extraBytes, blockSize):
+ """ Add padding to a binary string to make it an even multiple
+ of the block size """
+ blocks, numExtraBytes = divmod(len(extraBytes), blockSize)
+ padLength = blockSize - numExtraBytes
+ return extraBytes + padLength*chr(padLength)
+
+ def removePad(self, paddedBinaryString, blockSize):
+ """ Remove padding from a binary string """
+ if not(0<len(paddedBinaryString)):
+ raise DecryptNotBlockAlignedError, 'Expected More Data'
+ return paddedBinaryString[:-ord(paddedBinaryString[-1])]
+
+ class noPadding(Pad):
+ """ No padding. Use this to get ECB behavior from encrypt/decrypt """
+
+ def addPad(self, extraBytes, blockSize):
+ """ Add no padding """
+ return extraBytes
+
+ def removePad(self, paddedBinaryString, blockSize):
+ """ Remove no padding """
+ return paddedBinaryString
+
+ """
+ Rijndael encryption algorithm
+ This byte oriented implementation is intended to closely
+ match FIPS specification for readability. It is not implemented
+ for performance.
+ """
+
+ class Rijndael(BlockCipher):
+ """ Rijndael encryption algorithm """
+ def __init__(self, key = None, padding = padWithPadLen(), keySize=16, blockSize=16 ):
+ self.name = 'RIJNDAEL'
+ self.keySize = keySize
+ self.strength = keySize*8
+ self.blockSize = blockSize # blockSize is in bytes
+ self.padding = padding # change default to noPadding() to get normal ECB behavior
+
+ assert( keySize%4==0 and NrTable[4].has_key(keySize/4)),'key size must be 16,20,24,29 or 32 bytes'
+ assert( blockSize%4==0 and NrTable.has_key(blockSize/4)), 'block size must be 16,20,24,29 or 32 bytes'
+
+ self.Nb = self.blockSize/4 # Nb is number of columns of 32 bit words
+ self.Nk = keySize/4 # Nk is the key length in 32-bit words
+ self.Nr = NrTable[self.Nb][self.Nk] # The number of rounds (Nr) is a function of
+ # the block (Nb) and key (Nk) sizes.
+ if key != None:
+ self.setKey(key)
+
+ def setKey(self, key):
+ """ Set a key and generate the expanded key """
+ assert( len(key) == (self.Nk*4) ), 'Key length must be same as keySize parameter'
+ self.__expandedKey = keyExpansion(self, key)
+ self.reset() # BlockCipher.reset()
+
+ def encryptBlock(self, plainTextBlock):
+ """ Encrypt a block, plainTextBlock must be a array of bytes [Nb by 4] """
+ self.state = self._toBlock(plainTextBlock)
+ AddRoundKey(self, self.__expandedKey[0:self.Nb])
+ for round in range(1,self.Nr): #for round = 1 step 1 to Nr
+ SubBytes(self)
+ ShiftRows(self)
+ MixColumns(self)
+ AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
+ SubBytes(self)
+ ShiftRows(self)
+ AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
+ return self._toBString(self.state)
+
+
+ def decryptBlock(self, encryptedBlock):
+ """ decrypt a block (array of bytes) """
+ self.state = self._toBlock(encryptedBlock)
+ AddRoundKey(self, self.__expandedKey[self.Nr*self.Nb:(self.Nr+1)*self.Nb])
+ for round in range(self.Nr-1,0,-1):
+ InvShiftRows(self)
+ InvSubBytes(self)
+ AddRoundKey(self, self.__expandedKey[round*self.Nb:(round+1)*self.Nb])
+ InvMixColumns(self)
+ InvShiftRows(self)
+ InvSubBytes(self)
+ AddRoundKey(self, self.__expandedKey[0:self.Nb])
+ return self._toBString(self.state)
+
+ def _toBlock(self, bs):
+ """ Convert binary string to array of bytes, state[col][row]"""
+ assert ( len(bs) == 4*self.Nb ), 'Rijndarl blocks must be of size blockSize'
+ return [[ord(bs[4*i]),ord(bs[4*i+1]),ord(bs[4*i+2]),ord(bs[4*i+3])] for i in range(self.Nb)]
+
+ def _toBString(self, block):
+ """ Convert block (array of bytes) to binary string """
+ l = []
+ for col in block:
+ for rowElement in col:
+ l.append(chr(rowElement))
+ return ''.join(l)
+ #-------------------------------------
+ """ Number of rounds Nr = NrTable[Nb][Nk]
+
+ Nb Nk=4 Nk=5 Nk=6 Nk=7 Nk=8
+ ------------------------------------- """
+ NrTable = {4: {4:10, 5:11, 6:12, 7:13, 8:14},
+ 5: {4:11, 5:11, 6:12, 7:13, 8:14},
+ 6: {4:12, 5:12, 6:12, 7:13, 8:14},
+ 7: {4:13, 5:13, 6:13, 7:13, 8:14},
+ 8: {4:14, 5:14, 6:14, 7:14, 8:14}}
+ #-------------------------------------
+ def keyExpansion(algInstance, keyString):
+ """ Expand a string of size keySize into a larger array """
+ Nk, Nb, Nr = algInstance.Nk, algInstance.Nb, algInstance.Nr # for readability
+ key = [ord(byte) for byte in keyString] # convert string to list
+ w = [[key[4*i],key[4*i+1],key[4*i+2],key[4*i+3]] for i in range(Nk)]
+ for i in range(Nk,Nb*(Nr+1)):
+ temp = w[i-1] # a four byte column
+ if (i%Nk) == 0 :
+ temp = temp[1:]+[temp[0]] # RotWord(temp)
+ temp = [ Sbox[byte] for byte in temp ]
+ temp[0] ^= Rcon[i/Nk]
+ elif Nk > 6 and i%Nk == 4 :
+ temp = [ Sbox[byte] for byte in temp ] # SubWord(temp)
+ w.append( [ w[i-Nk][byte]^temp[byte] for byte in range(4) ] )
+ return w
+
+ Rcon = (0,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36, # note extra '0' !!!
+ 0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,
+ 0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91)
+
+ #-------------------------------------
+ def AddRoundKey(algInstance, keyBlock):
+ """ XOR the algorithm state with a block of key material """
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] ^= keyBlock[column][row]
+ #-------------------------------------
+
+ def SubBytes(algInstance):
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] = Sbox[algInstance.state[column][row]]
+
+ def InvSubBytes(algInstance):
+ for column in range(algInstance.Nb):
+ for row in range(4):
+ algInstance.state[column][row] = InvSbox[algInstance.state[column][row]]
+
+ Sbox = (0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,
+ 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
+ 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,
+ 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
+ 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,
+ 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
+ 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,
+ 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
+ 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,
+ 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
+ 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,
+ 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
+ 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,
+ 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
+ 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,
+ 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
+ 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,
+ 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
+ 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,
+ 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
+ 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,
+ 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
+ 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,
+ 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
+ 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,
+ 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
+ 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,
+ 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
+ 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,
+ 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
+ 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,
+ 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16)
+
+ InvSbox = (0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
+ 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
+ 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
+ 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
+ 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
+ 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
+ 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
+ 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
+ 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
+ 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
+ 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
+ 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
+ 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
+ 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
+ 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
+ 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
+ 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
+ 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
+ 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
+ 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
+ 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
+ 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
+ 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
+ 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
+ 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
+ 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
+ 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
+ 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
+ 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
+ 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
+ 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
+ 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d)
+
+ #-------------------------------------
+ """ For each block size (Nb), the ShiftRow operation shifts row i
+ by the amount Ci. Note that row 0 is not shifted.
+ Nb C1 C2 C3
+ ------------------- """
+ shiftOffset = { 4 : ( 0, 1, 2, 3),
+ 5 : ( 0, 1, 2, 3),
+ 6 : ( 0, 1, 2, 3),
+ 7 : ( 0, 1, 2, 4),
+ 8 : ( 0, 1, 3, 4) }
+ def ShiftRows(algInstance):
+ tmp = [0]*algInstance.Nb # list of size Nb
+ for r in range(1,4): # row 0 reamains unchanged and can be skipped
+ for c in range(algInstance.Nb):
+ tmp[c] = algInstance.state[(c+shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
+ for c in range(algInstance.Nb):
+ algInstance.state[c][r] = tmp[c]
+ def InvShiftRows(algInstance):
+ tmp = [0]*algInstance.Nb # list of size Nb
+ for r in range(1,4): # row 0 reamains unchanged and can be skipped
+ for c in range(algInstance.Nb):
+ tmp[c] = algInstance.state[(c+algInstance.Nb-shiftOffset[algInstance.Nb][r]) % algInstance.Nb][r]
+ for c in range(algInstance.Nb):
+ algInstance.state[c][r] = tmp[c]
+ #-------------------------------------
+ def MixColumns(a):
+ Sprime = [0,0,0,0]
+ for j in range(a.Nb): # for each column
+ Sprime[0] = mul(2,a.state[j][0])^mul(3,a.state[j][1])^mul(1,a.state[j][2])^mul(1,a.state[j][3])
+ Sprime[1] = mul(1,a.state[j][0])^mul(2,a.state[j][1])^mul(3,a.state[j][2])^mul(1,a.state[j][3])
+ Sprime[2] = mul(1,a.state[j][0])^mul(1,a.state[j][1])^mul(2,a.state[j][2])^mul(3,a.state[j][3])
+ Sprime[3] = mul(3,a.state[j][0])^mul(1,a.state[j][1])^mul(1,a.state[j][2])^mul(2,a.state[j][3])
+ for i in range(4):
+ a.state[j][i] = Sprime[i]
+
+ def InvMixColumns(a):
+ """ Mix the four bytes of every column in a linear way
+ This is the opposite operation of Mixcolumn """
+ Sprime = [0,0,0,0]
+ for j in range(a.Nb): # for each column
+ Sprime[0] = mul(0x0E,a.state[j][0])^mul(0x0B,a.state[j][1])^mul(0x0D,a.state[j][2])^mul(0x09,a.state[j][3])
+ Sprime[1] = mul(0x09,a.state[j][0])^mul(0x0E,a.state[j][1])^mul(0x0B,a.state[j][2])^mul(0x0D,a.state[j][3])
+ Sprime[2] = mul(0x0D,a.state[j][0])^mul(0x09,a.state[j][1])^mul(0x0E,a.state[j][2])^mul(0x0B,a.state[j][3])
+ Sprime[3] = mul(0x0B,a.state[j][0])^mul(0x0D,a.state[j][1])^mul(0x09,a.state[j][2])^mul(0x0E,a.state[j][3])
+ for i in range(4):
+ a.state[j][i] = Sprime[i]
+
+ #-------------------------------------
+ def mul(a, b):
+ """ Multiply two elements of GF(2^m)
+ needed for MixColumn and InvMixColumn """
+ if (a !=0 and b!=0):
+ return Alogtable[(Logtable[a] + Logtable[b])%255]
+ else:
+ return 0
+
+ Logtable = ( 0, 0, 25, 1, 50, 2, 26, 198, 75, 199, 27, 104, 51, 238, 223, 3,
+ 100, 4, 224, 14, 52, 141, 129, 239, 76, 113, 8, 200, 248, 105, 28, 193,
+ 125, 194, 29, 181, 249, 185, 39, 106, 77, 228, 166, 114, 154, 201, 9, 120,
+ 101, 47, 138, 5, 33, 15, 225, 36, 18, 240, 130, 69, 53, 147, 218, 142,
+ 150, 143, 219, 189, 54, 208, 206, 148, 19, 92, 210, 241, 64, 70, 131, 56,
+ 102, 221, 253, 48, 191, 6, 139, 98, 179, 37, 226, 152, 34, 136, 145, 16,
+ 126, 110, 72, 195, 163, 182, 30, 66, 58, 107, 40, 84, 250, 133, 61, 186,
+ 43, 121, 10, 21, 155, 159, 94, 202, 78, 212, 172, 229, 243, 115, 167, 87,
+ 175, 88, 168, 80, 244, 234, 214, 116, 79, 174, 233, 213, 231, 230, 173, 232,
+ 44, 215, 117, 122, 235, 22, 11, 245, 89, 203, 95, 176, 156, 169, 81, 160,
+ 127, 12, 246, 111, 23, 196, 73, 236, 216, 67, 31, 45, 164, 118, 123, 183,
+ 204, 187, 62, 90, 251, 96, 177, 134, 59, 82, 161, 108, 170, 85, 41, 157,
+ 151, 178, 135, 144, 97, 190, 220, 252, 188, 149, 207, 205, 55, 63, 91, 209,
+ 83, 57, 132, 60, 65, 162, 109, 71, 20, 42, 158, 93, 86, 242, 211, 171,
+ 68, 17, 146, 217, 35, 32, 46, 137, 180, 124, 184, 38, 119, 153, 227, 165,
+ 103, 74, 237, 222, 197, 49, 254, 24, 13, 99, 140, 128, 192, 247, 112, 7)
+
+ Alogtable= ( 1, 3, 5, 15, 17, 51, 85, 255, 26, 46, 114, 150, 161, 248, 19, 53,
+ 95, 225, 56, 72, 216, 115, 149, 164, 247, 2, 6, 10, 30, 34, 102, 170,
+ 229, 52, 92, 228, 55, 89, 235, 38, 106, 190, 217, 112, 144, 171, 230, 49,
+ 83, 245, 4, 12, 20, 60, 68, 204, 79, 209, 104, 184, 211, 110, 178, 205,
+ 76, 212, 103, 169, 224, 59, 77, 215, 98, 166, 241, 8, 24, 40, 120, 136,
+ 131, 158, 185, 208, 107, 189, 220, 127, 129, 152, 179, 206, 73, 219, 118, 154,
+ 181, 196, 87, 249, 16, 48, 80, 240, 11, 29, 39, 105, 187, 214, 97, 163,
+ 254, 25, 43, 125, 135, 146, 173, 236, 47, 113, 147, 174, 233, 32, 96, 160,
+ 251, 22, 58, 78, 210, 109, 183, 194, 93, 231, 50, 86, 250, 21, 63, 65,
+ 195, 94, 226, 61, 71, 201, 64, 192, 91, 237, 44, 116, 156, 191, 218, 117,
+ 159, 186, 213, 100, 172, 239, 42, 126, 130, 157, 188, 223, 122, 142, 137, 128,
+ 155, 182, 193, 88, 232, 35, 101, 175, 234, 37, 111, 177, 200, 67, 197, 84,
+ 252, 31, 33, 99, 165, 244, 7, 9, 27, 45, 119, 153, 176, 203, 70, 202,
+ 69, 207, 74, 222, 121, 139, 134, 145, 168, 227, 62, 66, 198, 81, 243, 14,
+ 18, 54, 90, 238, 41, 123, 141, 140, 143, 138, 133, 148, 167, 242, 13, 23,
+ 57, 75, 221, 124, 132, 151, 162, 253, 28, 36, 108, 180, 199, 82, 246, 1)
+
+
+
+
+ """
+ AES Encryption Algorithm
+ The AES algorithm is just Rijndael algorithm restricted to the default
+ blockSize of 128 bits.
+ """
+
+ class AES(Rijndael):
+ """ The AES algorithm is the Rijndael block cipher restricted to block
+ sizes of 128 bits and key sizes of 128, 192 or 256 bits
+ """
+ def __init__(self, key = None, padding = padWithPadLen(), keySize=16):
+ """ Initialize AES, keySize is in bytes """
+ if not (keySize == 16 or keySize == 24 or keySize == 32) :
+ raise BadKeySizeError, 'Illegal AES key size, must be 16, 24, or 32 bytes'
+
+ Rijndael.__init__( self, key, padding=padding, keySize=keySize, blockSize=16 )
+
+ self.name = 'AES'
+
+
+ """
+ CBC mode of encryption for block ciphers.
+ This algorithm mode wraps any BlockCipher to make a
+ Cipher Block Chaining mode.
+ """
+ from random import Random # should change to crypto.random!!!
+
+
+ class CBC(BlockCipher):
+ """ The CBC class wraps block ciphers to make cipher block chaining (CBC) mode
+ algorithms. The initialization (IV) is automatic if set to None. Padding
+ is also automatic based on the Pad class used to initialize the algorithm
+ """
+ def __init__(self, blockCipherInstance, padding = padWithPadLen()):
+ """ CBC algorithms are created by initializing with a BlockCipher instance """
+ self.baseCipher = blockCipherInstance
+ self.name = self.baseCipher.name + '_CBC'
+ self.blockSize = self.baseCipher.blockSize
+ self.keySize = self.baseCipher.keySize
+ self.padding = padding
+ self.baseCipher.padding = noPadding() # baseCipher should NOT pad!!
+ self.r = Random() # for IV generation, currently uses
+ # mediocre standard distro version <----------------
+ import time
+ newSeed = time.ctime()+str(self.r) # seed with instance location
+ self.r.seed(newSeed) # to make unique
+ self.reset()
+
+ def setKey(self, key):
+ self.baseCipher.setKey(key)
+
+ # Overload to reset both CBC state and the wrapped baseCipher
+ def resetEncrypt(self):
+ BlockCipher.resetEncrypt(self) # reset CBC encrypt state (super class)
+ self.baseCipher.resetEncrypt() # reset base cipher encrypt state
+
+ def resetDecrypt(self):
+ BlockCipher.resetDecrypt(self) # reset CBC state (super class)
+ self.baseCipher.resetDecrypt() # reset base cipher decrypt state
+
+ def encrypt(self, plainText, iv=None, more=None):
+ """ CBC encryption - overloads baseCipher to allow optional explicit IV
+ when iv=None, iv is auto generated!
+ """
+ if self.encryptBlockCount == 0:
+ self.iv = iv
+ else:
+ assert(iv==None), 'IV used only on first call to encrypt'
+
+ return BlockCipher.encrypt(self,plainText, more=more)
+
+ def decrypt(self, cipherText, iv=None, more=None):
+ """ CBC decryption - overloads baseCipher to allow optional explicit IV
+ when iv=None, iv is auto generated!
+ """
+ if self.decryptBlockCount == 0:
+ self.iv = iv
+ else:
+ assert(iv==None), 'IV used only on first call to decrypt'
+
+ return BlockCipher.decrypt(self, cipherText, more=more)
+
+ def encryptBlock(self, plainTextBlock):
+ """ CBC block encryption, IV is set with 'encrypt' """
+ auto_IV = ''
+ if self.encryptBlockCount == 0:
+ if self.iv == None:
+ # generate IV and use
+ self.iv = ''.join([chr(self.r.randrange(256)) for i in range(self.blockSize)])
+ self.prior_encr_CT_block = self.iv
+ auto_IV = self.prior_encr_CT_block # prepend IV if it's automatic
+ else: # application provided IV
+ assert(len(self.iv) == self.blockSize ),'IV must be same length as block'
+ self.prior_encr_CT_block = self.iv
+ """ encrypt the prior CT XORed with the PT """
+ ct = self.baseCipher.encryptBlock( xor(self.prior_encr_CT_block, plainTextBlock) )
+ self.prior_encr_CT_block = ct
+ return auto_IV+ct
+
+ def decryptBlock(self, encryptedBlock):
+ """ Decrypt a single block """
+
+ if self.decryptBlockCount == 0: # first call, process IV
+ if self.iv == None: # auto decrypt IV?
+ self.prior_CT_block = encryptedBlock
+ return ''
+ else:
+ assert(len(self.iv)==self.blockSize),"Bad IV size on CBC decryption"
+ self.prior_CT_block = self.iv
+
+ dct = self.baseCipher.decryptBlock(encryptedBlock)
+ """ XOR the prior decrypted CT with the prior CT """
+ dct_XOR_priorCT = xor( self.prior_CT_block, dct )
+
+ self.prior_CT_block = encryptedBlock
+
+ return dct_XOR_priorCT
+
+
+ """
+ AES_CBC Encryption Algorithm
+ """
+
+ class aescbc_AES_CBC(CBC):
+ """ AES encryption in CBC feedback mode """
+ def __init__(self, key=None, padding=padWithPadLen(), keySize=16):
+ CBC.__init__( self, AES(key, noPadding(), keySize), padding)
+ self.name = 'AES_CBC'
+
+ class AES_CBC(object):
+ def __init__(self):
+ self._key = None
+ self._iv = None
+ self.aes = None
+
+ def set_decrypt_key(self, userkey, iv):
+ self._key = userkey
+ self._iv = iv
+ self.aes = aescbc_AES_CBC(userkey, noPadding(), len(userkey))
+
+ def decrypt(self, data):
+ iv = self._iv
+ cleartext = self.aes.decrypt(iv + data)
+ return cleartext
+
+ import hmac
+
+ class KeyIVGen(object):
+ # this only exists in openssl so we will use pure python implementation instead
+ # PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
+ # [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
+ def pbkdf2(self, passwd, salt, iter, keylen):
+
+ def xorstr( a, b ):
+ if len(a) != len(b):
+ raise Exception("xorstr(): lengths differ")
+ return ''.join((chr(ord(x)^ord(y)) for x, y in zip(a, b)))
+
+ def prf( h, data ):
+ hm = h.copy()
+ hm.update( data )
+ return hm.digest()
+
+ def pbkdf2_F( h, salt, itercount, blocknum ):
+ U = prf( h, salt + pack('>i',blocknum ) )
+ T = U
+ for i in range(2, itercount+1):
+ U = prf( h, U )
+ T = xorstr( T, U )
+ return T
+
+ sha = hashlib.sha1
+ digest_size = sha().digest_size
+ # l - number of output blocks to produce
+ l = keylen / digest_size
+ if keylen % digest_size != 0:
+ l += 1
+ h = hmac.new( passwd, None, sha )
+ T = ""
+ for i in range(1, l+1):
+ T += pbkdf2_F( h, salt, iter, i )
+ return T[0: keylen]
+
+ def UnprotectHeaderData(encryptedData):
+ passwdData = 'header_key_data'
+ salt = 'HEADER.2011'
+ iter = 0x80
+ keylen = 0x100
+ key_iv = KeyIVGen().pbkdf2(passwdData, salt, iter, keylen)
+ key = key_iv[0:32]
+ iv = key_iv[32:48]
+ aes=AES_CBC()
+ aes.set_decrypt_key(key, iv)
+ cleartext = aes.decrypt(encryptedData)
+ return cleartext
+
+ # Various character maps used to decrypt kindle info values.
+ # Probably supposed to act as obfuscation
+ charMap2 = "AaZzB0bYyCc1XxDdW2wEeVv3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_"
+ charMap5 = "AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE"
+ # New maps in K4PC 1.9.0
+ testMap1 = "n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M"
+ testMap6 = "9YzAb0Cd1Ef2n5Pr6St7Uvh3Jk4M8WxG"
+ testMap8 = "YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD"
+
+ # interface with Windows OS Routines
+ class DataBlob(Structure):
+ _fields_ = [('cbData', c_uint),
+ ('pbData', c_void_p)]
+ DataBlob_p = POINTER(DataBlob)
+
+
+ def GetSystemDirectory():
+ GetSystemDirectoryW = kernel32.GetSystemDirectoryW
+ GetSystemDirectoryW.argtypes = [c_wchar_p, c_uint]
+ GetSystemDirectoryW.restype = c_uint
+ def GetSystemDirectory():
+ buffer = create_unicode_buffer(MAX_PATH + 1)
+ GetSystemDirectoryW(buffer, len(buffer))
+ return buffer.value
+ return GetSystemDirectory
+ GetSystemDirectory = GetSystemDirectory()
+
+ def GetVolumeSerialNumber():
+ GetVolumeInformationW = kernel32.GetVolumeInformationW
+ GetVolumeInformationW.argtypes = [c_wchar_p, c_wchar_p, c_uint,
+ POINTER(c_uint), POINTER(c_uint),
+ POINTER(c_uint), c_wchar_p, c_uint]
+ GetVolumeInformationW.restype = c_uint
+ def GetVolumeSerialNumber(path = GetSystemDirectory().split('\\')[0] + '\\'):
+ vsn = c_uint(0)
+ GetVolumeInformationW(path, None, 0, byref(vsn), None, None, None, 0)
+ return str(vsn.value)
+ return GetVolumeSerialNumber
+ GetVolumeSerialNumber = GetVolumeSerialNumber()
+
+ def GetIDString():
+ vsn = GetVolumeSerialNumber()
+ #print('Using Volume Serial Number for ID: '+vsn)
+ return vsn
+
+ def getLastError():
+ GetLastError = kernel32.GetLastError
+ GetLastError.argtypes = None
+ GetLastError.restype = c_uint
+ def getLastError():
+ return GetLastError()
+ return getLastError
+ getLastError = getLastError()
+
+ def GetUserName():
+ GetUserNameW = advapi32.GetUserNameW
+ GetUserNameW.argtypes = [c_wchar_p, POINTER(c_uint)]
+ GetUserNameW.restype = c_uint
+ def GetUserName():
+ buffer = create_unicode_buffer(2)
+ size = c_uint(len(buffer))
+ while not GetUserNameW(buffer, byref(size)):
+ errcd = getLastError()
+ if errcd == 234:
+ # bad wine implementation up through wine 1.3.21
+ return "AlternateUserName"
+ buffer = create_unicode_buffer(len(buffer) * 2)
+ size.value = len(buffer)
+ return buffer.value.encode('utf-16-le')[::2]
+ return GetUserName
+ GetUserName = GetUserName()
+
+ def CryptUnprotectData():
+ _CryptUnprotectData = crypt32.CryptUnprotectData
+ _CryptUnprotectData.argtypes = [DataBlob_p, c_wchar_p, DataBlob_p,
+ c_void_p, c_void_p, c_uint, DataBlob_p]
+ _CryptUnprotectData.restype = c_uint
+ def CryptUnprotectData(indata, entropy, flags):
+ indatab = create_string_buffer(indata)
+ indata = DataBlob(len(indata), cast(indatab, c_void_p))
+ entropyb = create_string_buffer(entropy)
+ entropy = DataBlob(len(entropy), cast(entropyb, c_void_p))
+ outdata = DataBlob()
+ if not _CryptUnprotectData(byref(indata), None, byref(entropy),
+ None, None, flags, byref(outdata)):
+ # raise DrmException("Failed to Unprotect Data")
+ return 'failed'
+ return string_at(outdata.pbData, outdata.cbData)
+ return CryptUnprotectData
+ CryptUnprotectData = CryptUnprotectData()
+
+ # Returns Environmental Variables that contain unicode
+ def getEnvironmentVariable(name):
+ import ctypes
+ name = unicode(name) # make sure string argument is unicode
+ n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
+ if n == 0:
+ return None
+ buf = ctypes.create_unicode_buffer(u'\0'*n)
+ ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
+ return buf.value
+
+ # Locate all of the kindle-info style files and return as list
+ def getKindleInfoFiles():
+ kInfoFiles = []
+ # some 64 bit machines do not have the proper registry key for some reason
+ # or the python interface to the 32 vs 64 bit registry is broken
+ path = ""
+ if 'LOCALAPPDATA' in os.environ.keys():
+ # Python 2.x does not return unicode env. Use Python 3.x
+ path = winreg.ExpandEnvironmentStrings(u"%LOCALAPPDATA%")
+ # this is just another alternative.
+ # path = getEnvironmentVariable('LOCALAPPDATA')
+ if not os.path.isdir(path):
+ path = ""
+ else:
+ # User Shell Folders show take precedent over Shell Folders if present
+ try:
+ # this will still break
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\User Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ if not os.path.isdir(path):
+ path = ""
+ try:
+ regkey = winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\Shell Folders\\")
+ path = winreg.QueryValueEx(regkey, 'Local AppData')[0]
+ if not os.path.isdir(path):
+ path = ""
+ except RegError:
+ pass
+ except RegError:
+ pass
+
+ found = False
+ if path == "":
+ print ('Could not find the folder in which to look for kinfoFiles.')
+ else:
+ # Probably not the best. To Fix (shouldn't ignore in encoding) or use utf-8
+ print(u'searching for kinfoFiles in ' + path.encode('ascii', 'ignore'))
+
+ # look for (K4PC 1.9.0 and later) .kinf2011 file
+ kinfopath = path +'\\Amazon\\Kindle\\storage\\.kinf2011'
+ if os.path.isfile(kinfopath):
+ found = True
+ print('Found K4PC 1.9+ kinf2011 file: ' + kinfopath.encode('ascii','ignore'))
+ kInfoFiles.append(kinfopath)
+
+ # look for (K4PC 1.6.0 and later) rainier.2.1.1.kinf file
+ kinfopath = path +'\\Amazon\\Kindle\\storage\\rainier.2.1.1.kinf'
+ if os.path.isfile(kinfopath):
+ found = True
+ print('Found K4PC 1.6-1.8 kinf file: ' + kinfopath)
+ kInfoFiles.append(kinfopath)
+
+ # look for (K4PC 1.5.0 and later) rainier.2.1.1.kinf file
+ kinfopath = path +'\\Amazon\\Kindle For PC\\storage\\rainier.2.1.1.kinf'
+ if os.path.isfile(kinfopath):
+ found = True
+ print('Found K4PC 1.5 kinf file: ' + kinfopath)
+ kInfoFiles.append(kinfopath)
-letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'
+ # look for original (earlier than K4PC 1.5.0) kindle-info files
+ kinfopath = path +'\\Amazon\\Kindle For PC\\{AMAwzsaPaaZAzmZzZQzgZCAkZ3AjA_AY}\\kindle.info'
+ if os.path.isfile(kinfopath):
+ found = True
+ print('Found K4PC kindle.info file: ' + kinfopath)
+ kInfoFiles.append(kinfopath)
-def crc32(s):
- return (~binascii.crc32(s,-1))&0xFFFFFFFF
+ if not found:
+ print('No K4PC kindle.info/kinf/kinf2011 files have been found.')
+ return kInfoFiles
-def checksumPid(s):
- crc = crc32(s)
- crc = crc ^ (crc >> 16)
- res = s
- l = len(letters)
- for i in (0,1):
- b = crc & 0xff
- pos = (b // l) ^ (b % l)
- res += letters[pos%l]
- crc >>= 8
- return res
+ # determine type of kindle info provided and return a
+ # database of keynames and values
+ def getDBfromFile(kInfoFile):
+ names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF']
+ DB = {}
+ with open(kInfoFile, 'rb') as infoReader:
+ hdr = infoReader.read(1)
+ data = infoReader.read()
-def pidFromSerial(s, l):
- crc = crc32(s)
+ if data.find('{') != -1 :
+ # older style kindle-info file
+ items = data.split('{')
+ for item in items:
+ if item != '':
+ keyhash, rawdata = item.split(':')
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap2) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+ encryptedValue = decode(rawdata,charMap2)
+ DB[keyname] = CryptUnprotectData(encryptedValue, "", 0)
+ elif hdr == '/':
+ # else rainier-2-1-1 .kinf file
+ # the .kinf file uses "/" to separate it into records
+ # so remove the trailing "/" to make it easy to use split
+ data = data[:-1]
+ items = data.split('/')
- arr1 = [0]*l
- for i in xrange(len(s)):
- arr1[i%l] ^= ord(s[i])
+ # loop through the item records until all are processed
+ while len(items) > 0:
- crc_bytes = [crc >> 24 & 0xff, crc >> 16 & 0xff, crc >> 8 & 0xff, crc & 0xff]
- for i in xrange(l):
- arr1[i] ^= crc_bytes[i&3]
+ # get the first item record
+ item = items.pop(0)
- pid = ''
- for i in xrange(l):
- b = arr1[i] & 0xff
- pid+=letters[(b >> 7) + ((b >> 5 & 3) ^ (b & 0x1f))]
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+
+ # the raw keyhash string is used to create entropy for the actual
+ # CryptProtectData Blob that represents that keys contents
+ entropy = SHA1(keyhash)
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,charMap5) == keyhash:
+ keyname = name
+ break
+ if keyname == "unknown":
+ keyname = keyhash
+ # the charMap5 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using charMap5 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the charMap5 encoded contents seems to be:
+ # len(contents)-largest prime number <= int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by charMap5
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+ noffset = contlen - primes(int(contlen/3))[-1]
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using Map5 to get the CryptProtect Data
+ encryptedValue = decode(encdata,charMap5)
+ DB[keyname] = CryptUnprotectData(encryptedValue, entropy, 1)
+ else:
+ # else newest .kinf2011 style .kinf file
+ # the .kinf file uses "/" to separate it into records
+ # so remove the trailing "/" to make it easy to use split
+ # need to put back the first char read because it it part
+ # of the added entropy blob
+ data = hdr + data[:-1]
+ items = data.split('/')
+
+ # starts with and encoded and encrypted header blob
+ headerblob = items.pop(0)
+ encryptedValue = decode(headerblob, testMap1)
+ cleartext = UnprotectHeaderData(encryptedValue)
+ # now extract the pieces that form the added entropy
+ pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
+ for m in re.finditer(pattern, cleartext):
+ added_entropy = m.group(2) + m.group(4)
+
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+
+ # the sha1 of raw keyhash string is used to create entropy along
+ # with the added entropy provided above from the headerblob
+ entropy = SHA1(keyhash) + added_entropy
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ # key names now use the new testMap8 encoding
+ keyname = "unknown"
+ for name in names:
+ if encodeHash(name,testMap8) == keyhash:
+ keyname = name
+ break
+
+ # the testMap8 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using testMap8 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the testMap8 encoded contents seems to be:
+ # len(contents)-largest prime number <= int(len(content)/3)
+ # (in other words split "about" 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by testMap8
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ encdata = "".join(edlst)
+ contlen = len(encdata)
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using new testMap8 to get the original CryptProtect Data
+ encryptedValue = decode(encdata,testMap8)
+ cleartext = CryptUnprotectData(encryptedValue, entropy, 1)
+ DB[keyname] = cleartext
+
+ if 'kindle.account.tokens' in DB:
+ print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(GetIDString(), GetUserName().decode("latin-1"))
+ # store values used in decryption
+ DB['IDString'] = GetIDString()
+ DB['UserName'] = GetUserName()
+ else:
+ DB = {}
+ return DB
+elif isosx:
+ import copy
+ import subprocess
+
+ # interface to needed routines in openssl's libcrypto
+ def _load_crypto_libcrypto():
+ from ctypes import CDLL, byref, POINTER, c_void_p, c_char_p, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, addressof, string_at, cast
+ from ctypes.util import find_library
+
+ libcrypto = find_library('crypto')
+ if libcrypto is None:
+ raise DrmException(u"libcrypto not found")
+ libcrypto = CDLL(libcrypto)
+
+ # From OpenSSL's crypto aes header
+ #
+ # AES_ENCRYPT 1
+ # AES_DECRYPT 0
+ # AES_MAXNR 14 (in bytes)
+ # AES_BLOCK_SIZE 16 (in bytes)
+ #
+ # struct aes_key_st {
+ # unsigned long rd_key[4 *(AES_MAXNR + 1)];
+ # int rounds;
+ # };
+ # typedef struct aes_key_st AES_KEY;
+ #
+ # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key);
+ #
+ # note: the ivec string, and output buffer are both mutable
+ # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ # const unsigned long length, const AES_KEY *key, unsigned char *ivec, const int enc);
+
+ AES_MAXNR = 14
+ c_char_pp = POINTER(c_char_p)
+ c_int_p = POINTER(c_int)
+
+ class AES_KEY(Structure):
+ _fields_ = [('rd_key', c_long * (4 * (AES_MAXNR + 1))), ('rounds', c_int)]
+ AES_KEY_p = POINTER(AES_KEY)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ AES_cbc_encrypt = F(None, 'AES_cbc_encrypt',[c_char_p, c_char_p, c_ulong, AES_KEY_p, c_char_p,c_int])
+
+ AES_set_decrypt_key = F(c_int, 'AES_set_decrypt_key',[c_char_p, c_int, AES_KEY_p])
+
+ # From OpenSSL's Crypto evp/p5_crpt2.c
+ #
+ # int PKCS5_PBKDF2_HMAC_SHA1(const char *pass, int passlen,
+ # const unsigned char *salt, int saltlen, int iter,
+ # int keylen, unsigned char *out);
+
+ PKCS5_PBKDF2_HMAC_SHA1 = F(c_int, 'PKCS5_PBKDF2_HMAC_SHA1',
+ [c_char_p, c_ulong, c_char_p, c_ulong, c_ulong, c_ulong, c_char_p])
+
+ class LibCrypto(object):
+ def __init__(self):
+ self._blocksize = 0
+ self._keyctx = None
+ self._iv = 0
+
+ def set_decrypt_key(self, userkey, iv):
+ self._blocksize = len(userkey)
+ if (self._blocksize != 16) and (self._blocksize != 24) and (self._blocksize != 32) :
+ raise DrmException(u"AES improper key used")
+ return
+ keyctx = self._keyctx = AES_KEY()
+ self._iv = iv
+ self._userkey = userkey
+ rv = AES_set_decrypt_key(userkey, len(userkey) * 8, keyctx)
+ if rv < 0:
+ raise DrmException(u"Failed to initialize AES key")
+
+ def decrypt(self, data):
+ out = create_string_buffer(len(data))
+ mutable_iv = create_string_buffer(self._iv, len(self._iv))
+ keyctx = self._keyctx
+ rv = AES_cbc_encrypt(data, out, len(data), keyctx, mutable_iv, 0)
+ if rv == 0:
+ raise DrmException(u"AES decryption failed")
+ return out.raw
+
+ def keyivgen(self, passwd, salt, iter, keylen):
+ saltlen = len(salt)
+ passlen = len(passwd)
+ out = create_string_buffer(keylen)
+ rv = PKCS5_PBKDF2_HMAC_SHA1(passwd, passlen, salt, saltlen, iter, keylen, out)
+ return out.raw
+ return LibCrypto
+
+ def _load_crypto():
+ LibCrypto = None
+ try:
+ LibCrypto = _load_crypto_libcrypto()
+ except (ImportError, DrmException):
+ pass
+ return LibCrypto
+
+ LibCrypto = _load_crypto()
+
+ # Various character maps used to decrypt books. Probably supposed to act as obfuscation
+ charMap1 = 'n5Pr6St7Uv8Wx9YzAb0Cd1Ef2Gh3Jk4M'
+ charMap2 = 'ZB0bYyc1xDdW2wEV3Ff7KkPpL8UuGA4gz-Tme9Nn_tHh5SvXCsIiR6rJjQaqlOoM'
+
+ # For kinf approach of K4Mac 1.6.X or later
+ # On K4PC charMap5 = 'AzB0bYyCeVvaZ3FfUuG4g-TtHh5SsIiR6rJjQq7KkPpL8lOoMm9Nn_c1XxDdW2wE'
+ # For Mac they seem to re-use charMap2 here
+ charMap5 = charMap2
+
+ # new in K4M 1.9.X
+ testMap8 = 'YvaZ3FfUm9Nn_c1XuG4yCAzB0beVg-TtHh5SsIiR6rJjQdW2wEq7KkPpL8lOoMxD'
+
+ # uses a sub process to get the Hard Drive Serial Number using ioreg
+ # returns serial numbers of all internal hard drive drives
+ def GetVolumesSerialNumbers():
+ sernums = []
+ sernum = os.getenv('MYSERIALNUMBER')
+ if sernum != None:
+ sernums.append(sernum.strip())
+ cmdline = '/usr/sbin/ioreg -w 0 -r -c AppleAHCIDiskDriver'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ bsdname = None
+ sernum = None
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('\"Serial Number\" = \"')
+ if pp >= 0:
+ sernum = resline[pp+19:-1]
+ sernums.append(sernum.strip())
+ return sernums
+
+ def GetUserHomeAppSupKindleDirParitionName():
+ home = os.getenv('HOME')
+ dpath = home + '/Library'
+ cmdline = '/sbin/mount'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ disk = ''
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ if resline.startswith('/dev'):
+ (devpart, mpath) = resline.split(' on ')
+ dpart = devpart[5:]
+ pp = mpath.find('(')
+ if pp >= 0:
+ mpath = mpath[:pp-1]
+ if dpath.startswith(mpath):
+ disk = dpart
+ return disk
+
+ # uses a sub process to get the UUID of the specified disk partition using ioreg
+ def GetDiskPartitionUUIDs(diskpart):
+ uuids = []
+ uuidnum = os.getenv('MYUUIDNUMBER')
+ if uuidnum != None:
+ uuids.append(strip(uuidnum))
+ cmdline = '/usr/sbin/ioreg -l -S -w 0 -r -c AppleAHCIDiskDriver'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ bsdname = None
+ uuidnum = None
+ foundIt = False
+ nest = 0
+ uuidnest = -1
+ partnest = -2
+ for j in xrange(cnt):
+ resline = reslst[j]
+ if resline.find('{') >= 0:
+ nest += 1
+ if resline.find('}') >= 0:
+ nest -= 1
+ pp = resline.find('\"UUID\" = \"')
+ if pp >= 0:
+ uuidnum = resline[pp+10:-1]
+ uuidnum = uuidnum.strip()
+ uuidnest = nest
+ if partnest == uuidnest and uuidnest > 0:
+ foundIt = True
+ break
+ bb = resline.find('\"BSD Name\" = \"')
+ if bb >= 0:
+ bsdname = resline[bb+14:-1]
+ bsdname = bsdname.strip()
+ if (bsdname == diskpart):
+ partnest = nest
+ else :
+ partnest = -2
+ if partnest == uuidnest and partnest > 0:
+ foundIt = True
+ break
+ if nest == 0:
+ partnest = -2
+ uuidnest = -1
+ uuidnum = None
+ bsdname = None
+ if foundIt:
+ uuids.append(uuidnum)
+ return uuids
+
+ def GetMACAddressesMunged():
+ macnums = []
+ macnum = os.getenv('MYMACNUM')
+ if macnum != None:
+ macnums.append(macnum)
+ cmdline = '/sbin/ifconfig en0'
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p = subprocess.Popen(cmdline, shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
+ out1, out2 = p.communicate()
+ reslst = out1.split('\n')
+ cnt = len(reslst)
+ macnum = None
+ foundIt = False
+ for j in xrange(cnt):
+ resline = reslst[j]
+ pp = resline.find('ether ')
+ if pp >= 0:
+ macnum = resline[pp+6:-1]
+ macnum = macnum.strip()
+ # print 'original mac', macnum
+ # now munge it up the way Kindle app does
+ # by xoring it with 0xa5 and swapping elements 3 and 4
+ maclst = macnum.split(':')
+ n = len(maclst)
+ if n != 6:
+ fountIt = False
+ break
+ for i in range(6):
+ maclst[i] = int('0x' + maclst[i], 0)
+ mlst = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+ mlst[5] = maclst[5] ^ 0xa5
+ mlst[4] = maclst[3] ^ 0xa5
+ mlst[3] = maclst[4] ^ 0xa5
+ mlst[2] = maclst[2] ^ 0xa5
+ mlst[1] = maclst[1] ^ 0xa5
+ mlst[0] = maclst[0] ^ 0xa5
+ macnum = '%0.2x%0.2x%0.2x%0.2x%0.2x%0.2x' % (mlst[0], mlst[1], mlst[2], mlst[3], mlst[4], mlst[5])
+ foundIt = True
+ break
+ if foundIt:
+ macnums.append(macnum)
+ return macnums
+
+
+ # uses unix env to get username instead of using sysctlbyname
+ def GetUserName():
+ username = os.getenv('USER')
+ return username
+
+ def GetIDStrings():
+ # Return all possible ID Strings
+ strings = []
+ strings.extend(GetMACAddressesMunged())
+ strings.extend(GetVolumesSerialNumbers())
+ diskpart = GetUserHomeAppSupKindleDirParitionName()
+ strings.extend(GetDiskPartitionUUIDs(diskpart))
+ strings.append('9999999999')
+ #print strings
+ return strings
+
+
+ # implements an Pseudo Mac Version of Windows built-in Crypto routine
+ # used by Kindle for Mac versions < 1.6.0
+ class CryptUnprotectData(object):
+ def __init__(self, IDString):
+ sp = IDString + '!@#' + GetUserName()
+ passwdData = encode(SHA256(sp),charMap1)
+ salt = '16743'
+ self.crp = LibCrypto()
+ iter = 0x3e8
+ keylen = 0x80
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext,charMap1)
+ return cleartext
+
+
+ # implements an Pseudo Mac Version of Windows built-in Crypto routine
+ # used for Kindle for Mac Versions >= 1.6.0
+ class CryptUnprotectDataV2(object):
+ def __init__(self, IDString):
+ sp = GetUserName() + ':&%:' + IDString
+ passwdData = encode(SHA256(sp),charMap5)
+ # salt generation as per the code
+ salt = 0x0512981d * 2 * 1 * 1
+ salt = str(salt) + GetUserName()
+ salt = encode(salt,charMap5)
+ self.crp = LibCrypto()
+ iter = 0x800
+ keylen = 0x400
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext, charMap5)
+ return cleartext
+
+
+ # unprotect the new header blob in .kinf2011
+ # used in Kindle for Mac Version >= 1.9.0
+ def UnprotectHeaderData(encryptedData):
+ passwdData = 'header_key_data'
+ salt = 'HEADER.2011'
+ iter = 0x80
+ keylen = 0x100
+ crp = LibCrypto()
+ key_iv = crp.keyivgen(passwdData, salt, iter, keylen)
+ key = key_iv[0:32]
+ iv = key_iv[32:48]
+ crp.set_decrypt_key(key,iv)
+ cleartext = crp.decrypt(encryptedData)
+ return cleartext
+
+
+ # implements an Pseudo Mac Version of Windows built-in Crypto routine
+ # used for Kindle for Mac Versions >= 1.9.0
+ class CryptUnprotectDataV3(object):
+ def __init__(self, entropy, IDString):
+ sp = GetUserName() + '+@#$%+' + IDString
+ passwdData = encode(SHA256(sp),charMap2)
+ salt = entropy
+ self.crp = LibCrypto()
+ iter = 0x800
+ keylen = 0x400
+ key_iv = self.crp.keyivgen(passwdData, salt, iter, keylen)
+ self.key = key_iv[0:32]
+ self.iv = key_iv[32:48]
+ self.crp.set_decrypt_key(self.key, self.iv)
+
+ def decrypt(self, encryptedData):
+ cleartext = self.crp.decrypt(encryptedData)
+ cleartext = decode(cleartext, charMap2)
+ return cleartext
+
+
+ # Locate the .kindle-info files
+ def getKindleInfoFiles():
+ # file searches can take a long time on some systems, so just look in known specific places.
+ kInfoFiles=[]
+ found = False
+ home = os.getenv('HOME')
+ # check for .kinf2011 file in new location (App Store Kindle for Mac)
+ testpath = home + '/Library/Containers/com.amazon.Kindle/Data/Library/Application Support/Kindle/storage/.kinf2011'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac kinf2011 file: ' + testpath)
+ found = True
+ # check for .kinf2011 files from 1.10
+ testpath = home + '/Library/Application Support/Kindle/storage/.kinf2011'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac kinf2011 file: ' + testpath)
+ found = True
+ # check for .rainier-2.1.1-kinf files from 1.6
+ testpath = home + '/Library/Application Support/Kindle/storage/.rainier-2.1.1-kinf'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac rainier file: ' + testpath)
+ found = True
+ # check for .kindle-info files from 1.4
+ testpath = home + '/Library/Application Support/Kindle/storage/.kindle-info'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac kindle-info file: ' + testpath)
+ found = True
+ # check for .kindle-info file from 1.2.2
+ testpath = home + '/Library/Application Support/Amazon/Kindle/storage/.kindle-info'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac kindle-info file: ' + testpath)
+ found = True
+ # check for .kindle-info file from 1.0 beta 1 (27214)
+ testpath = home + '/Library/Application Support/Amazon/Kindle for Mac/storage/.kindle-info'
+ if os.path.isfile(testpath):
+ kInfoFiles.append(testpath)
+ print('Found k4Mac kindle-info file: ' + testpath)
+ found = True
+ if not found:
+ print('No k4Mac kindle-info/rainier/kinf2011 files have been found.')
+ return kInfoFiles
+
+ # determine type of kindle info provided and return a
+ # database of keynames and values
+ def getDBfromFile(kInfoFile):
+ names = ['kindle.account.tokens','kindle.cookie.item','eulaVersionAccepted','login_date','kindle.token.item','login','kindle.key.item','kindle.name.info','kindle.device.info', 'MazamaRandomNumber', 'max_date', 'SIGVERIF']
+ with open(kInfoFile, 'rb') as infoReader:
+ filehdr = infoReader.read(1)
+ filedata = infoReader.read()
+
+ IDStrings = GetIDStrings()
+ for IDString in IDStrings:
+ DB = {}
+ #print "trying IDString:",IDString
+ try:
+ hdr = filehdr
+ data = filedata
+ if data.find('[') != -1 :
+ # older style kindle-info file
+ cud = CryptUnprotectData(IDString)
+ items = data.split('[')
+ for item in items:
+ if item != '':
+ keyhash, rawdata = item.split(':')
+ keyname = 'unknown'
+ for name in names:
+ if encodeHash(name,charMap2) == keyhash:
+ keyname = name
+ break
+ if keyname == 'unknown':
+ keyname = keyhash
+ encryptedValue = decode(rawdata,charMap2)
+ cleartext = cud.decrypt(encryptedValue)
+ if len(cleartext) > 0:
+ DB[keyname] = cleartext
+ if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ break
+ elif hdr == '/':
+ # else newer style .kinf file used by K4Mac >= 1.6.0
+ # the .kinf file uses '/' to separate it into records
+ # so remove the trailing '/' to make it easy to use split
+ data = data[:-1]
+ items = data.split('/')
+ cud = CryptUnprotectDataV2(IDString)
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+ keyname = 'unknown'
+
+ # the raw keyhash string is also used to create entropy for the actual
+ # CryptProtectData Blob that represents that keys contents
+ # 'entropy' not used for K4Mac only K4PC
+ # entropy = SHA1(keyhash)
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = 'unknown'
+ for name in names:
+ if encodeHash(name,charMap5) == keyhash:
+ keyname = name
+ break
+ if keyname == 'unknown':
+ keyname = keyhash
+
+ # the charMap5 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using charMap5 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the charMap5 encoded contents seems to be:
+ # len(contents) - largest prime number less than or equal to int(len(content)/3)
+ # (in other words split 'about' 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by charMap5
+ encdata = ''.join(edlst)
+ contlen = len(encdata)
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using charMap5 to get the CryptProtect Data
+ encryptedValue = decode(encdata,charMap5)
+ cleartext = cud.decrypt(encryptedValue)
+ if len(cleartext) > 0:
+ DB[keyname] = cleartext
+
+ if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ break
+ else:
+ # the latest .kinf2011 version for K4M 1.9.1
+ # put back the hdr char, it is needed
+ data = hdr + data
+ data = data[:-1]
+ items = data.split('/')
+
+ # the headerblob is the encrypted information needed to build the entropy string
+ headerblob = items.pop(0)
+ encryptedValue = decode(headerblob, charMap1)
+ cleartext = UnprotectHeaderData(encryptedValue)
+
+ # now extract the pieces in the same way
+ # this version is different from K4PC it scales the build number by multipying by 735
+ pattern = re.compile(r'''\[Version:(\d+)\]\[Build:(\d+)\]\[Cksum:([^\]]+)\]\[Guid:([\{\}a-z0-9\-]+)\]''', re.IGNORECASE)
+ for m in re.finditer(pattern, cleartext):
+ entropy = str(int(m.group(2)) * 0x2df) + m.group(4)
+
+ cud = CryptUnprotectDataV3(entropy,IDString)
+
+ # loop through the item records until all are processed
+ while len(items) > 0:
+
+ # get the first item record
+ item = items.pop(0)
+
+ # the first 32 chars of the first record of a group
+ # is the MD5 hash of the key name encoded by charMap5
+ keyhash = item[0:32]
+ keyname = 'unknown'
+
+ # unlike K4PC the keyhash is not used in generating entropy
+ # entropy = SHA1(keyhash) + added_entropy
+ # entropy = added_entropy
+
+ # the remainder of the first record when decoded with charMap5
+ # has the ':' split char followed by the string representation
+ # of the number of records that follow
+ # and make up the contents
+ srcnt = decode(item[34:],charMap5)
+ rcnt = int(srcnt)
+
+ # read and store in rcnt records of data
+ # that make up the contents value
+ edlst = []
+ for i in xrange(rcnt):
+ item = items.pop(0)
+ edlst.append(item)
+
+ keyname = 'unknown'
+ for name in names:
+ if encodeHash(name,testMap8) == keyhash:
+ keyname = name
+ break
+ if keyname == 'unknown':
+ keyname = keyhash
+
+ # the testMap8 encoded contents data has had a length
+ # of chars (always odd) cut off of the front and moved
+ # to the end to prevent decoding using testMap8 from
+ # working properly, and thereby preventing the ensuing
+ # CryptUnprotectData call from succeeding.
+
+ # The offset into the testMap8 encoded contents seems to be:
+ # len(contents) - largest prime number less than or equal to int(len(content)/3)
+ # (in other words split 'about' 2/3rds of the way through)
+
+ # move first offsets chars to end to align for decode by testMap8
+ encdata = ''.join(edlst)
+ contlen = len(encdata)
+
+ # now properly split and recombine
+ # by moving noffset chars from the start of the
+ # string to the end of the string
+ noffset = contlen - primes(int(contlen/3))[-1]
+ pfx = encdata[0:noffset]
+ encdata = encdata[noffset:]
+ encdata = encdata + pfx
+
+ # decode using testMap8 to get the CryptProtect Data
+ encryptedValue = decode(encdata,testMap8)
+ cleartext = cud.decrypt(encryptedValue)
+ # print keyname
+ # print cleartext
+ if len(cleartext) > 0:
+ DB[keyname] = cleartext
+
+ if 'MazamaRandomNumber' in DB and 'kindle.account.tokens' in DB:
+ break
+ except:
+ pass
+ if 'kindle.account.tokens' in DB:
+ # store values used in decryption
+ print u"Decrypted key file using IDString '{0:s}' and UserName '{1:s}'".format(IDString, GetUserName())
+ DB['IDString'] = IDString
+ DB['UserName'] = GetUserName()
+ else:
+ print u"Couldn't decrypt file."
+ DB = {}
+ return DB
+else:
+ def getDBfromFile(kInfoFile):
+ raise DrmException(u"This script only runs under Windows or Mac OS X.")
+ return {}
+
+def kindlekeys(files = []):
+ keys = []
+ if files == []:
+ files = getKindleInfoFiles()
+ for file in files:
+ key = getDBfromFile(file)
+ if key:
+ # convert all values to hex, just in case.
+ for keyname in key:
+ key[keyname]=key[keyname].encode('hex')
+ keys.append(key)
+ return keys
+
+# interface for Python DeDRM
+# returns single key or multiple keys, depending on path or file passed in
+def getkey(outpath, files=[]):
+ keys = kindlekeys(files)
+ if len(keys) > 0:
+ if not os.path.isdir(outpath):
+ outfile = outpath
+ with file(outfile, 'w') as keyfileout:
+ keyfileout.write(json.dumps(keys[0]))
+ print u"Saved a key to {0}".format(outfile)
+ else:
+ keycount = 0
+ for key in keys:
+ while True:
+ keycount += 1
+ outfile = os.path.join(outpath,u"kindlekey{0:d}.k4i".format(keycount))
+ if not os.path.exists(outfile):
+ break
+ with file(outfile, 'w') as keyfileout:
+ keyfileout.write(json.dumps(key))
+ print u"Saved a key to {0}".format(outfile)
+ return True
+ return False
+
+def usage(progname):
+ print u"Finds, decrypts and saves the default Kindle For Mac/PC encryption keys."
+ print u"Keys are saved to the current directory, or a specified output directory."
+ print u"If a file name is passed instead of a directory, only the first key is saved, in that file."
+ print u"Usage:"
+ print u" {0:s} [-h] [-k <kindle.info>] [<outpath>]".format(progname)
- return pid
def cli_main():
- print u"Mobipocket PID calculator for Amazon Kindle. Copyright © 2007, 2009 Igor Skochinsky"
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
argv=unicode_argv()
- if len(argv)==2:
- serial = argv[1]
+ progname = os.path.basename(argv[0])
+ print u"{0} v{1}\nCopyright © 2010-2013 some_updates and Apprentice Alf".format(progname,__version__)
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "hk:")
+ except getopt.GetoptError, err:
+ print u"Error in options or arguments: {0}".format(err.args[0])
+ usage(progname)
+ sys.exit(2)
+
+ files = []
+ for o, a in opts:
+ if o == "-h":
+ usage(progname)
+ sys.exit(0)
+ if o == "-k":
+ files = [a]
+
+ if len(args) > 1:
+ usage(progname)
+ sys.exit(2)
+
+ if len(args) == 1:
+ # save to the specified file or directory
+ outpath = args[0]
+ if not os.path.isabs(outpath):
+ outpath = os.path.abspath(outpath)
else:
- print u"Usage: kindlepid.py <Kindle Serial Number>/<iPhone/iPod Touch UDID>"
+ # save to the same directory as the script
+ outpath = os.path.dirname(argv[0])
+
+ # make sure the outpath is the
+ outpath = os.path.realpath(os.path.normpath(outpath))
+
+ if not getkey(outpath, files):
+ print u"Could not retrieve Kindle for Mac/PC key."
+ return 0
+
+
+def gui_main():
+ try:
+ import Tkinter
+ import Tkconstants
+ import tkMessageBox
+ import traceback
+ except:
+ return cli_main()
+
+ class ExceptionDialog(Tkinter.Frame):
+ def __init__(self, root, text):
+ Tkinter.Frame.__init__(self, root, border=5)
+ label = Tkinter.Label(self, text=u"Unexpected error:",
+ anchor=Tkconstants.W, justify=Tkconstants.LEFT)
+ label.pack(fill=Tkconstants.X, expand=0)
+ self.text = Tkinter.Text(self)
+ self.text.pack(fill=Tkconstants.BOTH, expand=1)
+
+ self.text.insert(Tkconstants.END, text)
+
+
+ argv=unicode_argv()
+ root = Tkinter.Tk()
+ root.withdraw()
+ progpath, progname = os.path.split(argv[0])
+ success = False
+ try:
+ keys = kindlekeys()
+ keycount = 0
+ for key in keys:
+ while True:
+ keycount += 1
+ outfile = os.path.join(progpath,u"kindlekey{0:d}.k4i".format(keycount))
+ if not os.path.exists(outfile):
+ break
+
+ with file(outfile, 'w') as keyfileout:
+ keyfileout.write(json.dumps(key))
+ success = True
+ tkMessageBox.showinfo(progname, u"Key successfully retrieved to {0}".format(outfile))
+ except DrmException, e:
+ tkMessageBox.showerror(progname, u"Error: {0}".format(str(e)))
+ except Exception:
+ root.wm_state('normal')
+ root.title(progname)
+ text = traceback.format_exc()
+ ExceptionDialog(root, text).pack(fill=Tkconstants.BOTH, expand=1)
+ root.mainloop()
+ if not success:
return 1
- if len(serial)==16:
- if serial.startswith("B") or serial.startswith("9"):
- print u"Kindle serial number detected"
- else:
- print u"Warning: unrecognized serial number. Please recheck input."
- return 1
- pid = pidFromSerial(serial.encode("utf-8"),7)+'*'
- print u"Mobipocket PID for Kindle serial#{0} is {1}".format(serial,checksumPid(pid))
- return 0
- elif len(serial)==40:
- print u"iPhone serial number (UDID) detected"
- pid = pidFromSerial(serial.encode("utf-8"),8)
- print u"Mobipocket PID for iPhone serial#{0} is {1}".format(serial,checksumPid(pid))
- return 0
- print u"Warning: unrecognized serial number. Please recheck input."
- return 1
-
-
-if __name__ == "__main__":
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(cli_main())
+ return 0
+
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ sys.exit(cli_main())
+ sys.exit(gui_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py b/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py
index 01c348c..8bbcf69 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/kindlepid.py
Binary files differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib
index 9a5a442..01c348c 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto.dylib
Binary files differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so
index a08ac28..9a5a442 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto32.so
Binary files differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so
index 7b69edc..a08ac28 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/libalfcrypto64.so
Binary files differ
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py b/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py
index 9a84e58..89cc695 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/mobidedrm.py
@@ -1,89 +1,541 @@
#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# -*- coding: utf-8 -*-
-# implement just enough of des from openssl to make erdr2pml.py happy
+# mobidedrm.py, version 0.38
+# Copyright © 2008 The Dark Reverser
+#
+# Modified 2008–2012 by some_updates, DiapDealer and Apprentice Alf
-def load_libcrypto():
- from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_char, c_int, c_long, \
- Structure, c_ulong, create_string_buffer, cast
- from ctypes.util import find_library
- import sys
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+#
+# Changelog
+# 0.01 - Initial version
+# 0.02 - Huffdic compressed books were not properly decrypted
+# 0.03 - Wasn't checking MOBI header length
+# 0.04 - Wasn't sanity checking size of data record
+# 0.05 - It seems that the extra data flags take two bytes not four
+# 0.06 - And that low bit does mean something after all :-)
+# 0.07 - The extra data flags aren't present in MOBI header < 0xE8 in size
+# 0.08 - ...and also not in Mobi header version < 6
+# 0.09 - ...but they are there with Mobi header version 6, header size 0xE4!
+# 0.10 - Outputs unencrypted files as-is, so that when run as a Calibre
+# import filter it works when importing unencrypted files.
+# Also now handles encrypted files that don't need a specific PID.
+# 0.11 - use autoflushed stdout and proper return values
+# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
+# 0.13 - Formatting fixes: retabbed file, removed trailing whitespace
+# and extra blank lines, converted CR/LF pairs at ends of each line,
+# and other cosmetic fixes.
+# 0.14 - Working out when the extra data flags are present has been problematic
+# Versions 7 through 9 have tried to tweak the conditions, but have been
+# only partially successful. Closer examination of lots of sample
+# files reveals that a confusion has arisen because trailing data entries
+# are not encrypted, but it turns out that the multibyte entries
+# in utf8 file are encrypted. (Although neither kind gets compressed.)
+# This knowledge leads to a simplification of the test for the
+# trailing data byte flags - version 5 and higher AND header size >= 0xE4.
+# 0.15 - Now outputs 'heartbeat', and is also quicker for long files.
+# 0.16 - And reverts to 'done' not 'done.' at the end for unswindle compatibility.
+# 0.17 - added modifications to support its use as an imported python module
+# both inside calibre and also in other places (ie K4DeDRM tools)
+# 0.17a- disabled the standalone plugin feature since a plugin can not import
+# a plugin
+# 0.18 - It seems that multibyte entries aren't encrypted in a v7 file...
+# Removed the disabled Calibre plug-in code
+# Permit use of 8-digit PIDs
+# 0.19 - It seems that multibyte entries aren't encrypted in a v6 file either.
+# 0.20 - Correction: It seems that multibyte entries are encrypted in a v6 file.
+# 0.21 - Added support for multiple pids
+# 0.22 - revised structure to hold MobiBook as a class to allow an extended interface
+# 0.23 - fixed problem with older files with no EXTH section
+# 0.24 - add support for type 1 encryption and 'TEXtREAd' books as well
+# 0.25 - Fixed support for 'BOOKMOBI' type 1 encryption
+# 0.26 - Now enables Text-To-Speech flag and sets clipping limit to 100%
+# 0.27 - Correct pid metadata token generation to match that used by skindle (Thank You Bart!)
+# 0.28 - slight additional changes to metadata token generation (None -> '')
+# 0.29 - It seems that the ideas about when multibyte trailing characters were
+# included in the encryption were wrong. They are for DOC compressed
+# files, but they are not for HUFF/CDIC compress files!
+# 0.30 - Modified interface slightly to work better with new calibre plugin style
+# 0.31 - The multibyte encrytion info is true for version 7 files too.
+# 0.32 - Added support for "Print Replica" Kindle ebooks
+# 0.33 - Performance improvements for large files (concatenation)
+# 0.34 - Performance improvements in decryption (libalfcrypto)
+# 0.35 - add interface to get mobi_version
+# 0.36 - fixed problem with TEXtREAd and getBookTitle interface
+# 0.37 - Fixed double announcement for stand-alone operation
+# 0.38 - Unicode used wherever possible, cope with absent alfcrypto
+# 0.39 - Fixed problem with TEXtREAd and getBookType interface
+# 0.40 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 0.41 - Fixed potential unicode problem in command line calls
- if sys.platform.startswith('win'):
- libcrypto = find_library('libeay32')
+
+__version__ = u"0.41"
+
+import sys
+import os
+import struct
+import binascii
+try:
+ from alfcrypto import Pukall_Cipher
+except:
+ print u"AlfCrypto not found. Using python PC1 implementation."
+
+# Wrap a stream so that output gets flushed immediately
+# and also make sure that any unicode strings get
+# encoded using "replace" before writing them.
+class SafeUnbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ self.encoding = stream.encoding
+ if self.encoding == None:
+ self.encoding = "utf-8"
+ def write(self, data):
+ if isinstance(data,unicode):
+ data = data.encode(self.encoding,"replace")
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+iswindows = sys.platform.startswith('win')
+isosx = sys.platform.startswith('darwin')
+
+def unicode_argv():
+ if iswindows:
+ # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
+ # strings.
+
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'.
+
+
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ xrange(start, argc.value)]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"mobidedrm.py"]
+ else:
+ argvencoding = sys.stdin.encoding
+ if argvencoding == None:
+ argvencoding = 'utf-8'
+ return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+
+
+class DrmException(Exception):
+ pass
+
+
+#
+# MobiBook Utility Routines
+#
+
+# Implementation of Pukall Cipher 1
+def PC1(key, src, decryption=True):
+ # if we can get it from alfcrypto, use that
+ try:
+ return Pukall_Cipher().PC1(key,src,decryption)
+ except NameError:
+ pass
+ except TypeError:
+ pass
+
+ # use slow python version, since Pukall_Cipher didn't load
+ sum1 = 0;
+ sum2 = 0;
+ keyXorVal = 0;
+ if len(key)!=16:
+ DrmException (u"PC1: Bad key length")
+ wkey = []
+ for i in xrange(8):
+ wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
+ dst = ""
+ for i in xrange(len(src)):
+ temp1 = 0;
+ byteXorVal = 0;
+ for j in xrange(8):
+ temp1 ^= wkey[j]
+ sum2 = (sum2+j)*20021 + sum1
+ sum1 = (temp1*346)&0xFFFF
+ sum2 = (sum2+sum1)&0xFFFF
+ temp1 = (temp1*20021+1)&0xFFFF
+ byteXorVal ^= temp1 ^ sum2
+ curByte = ord(src[i])
+ if not decryption:
+ keyXorVal = curByte * 257;
+ curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
+ if decryption:
+ keyXorVal = curByte * 257;
+ for j in xrange(8):
+ wkey[j] ^= keyXorVal;
+ dst+=chr(curByte)
+ return dst
+
+def checksumPid(s):
+ letters = 'ABCDEFGHIJKLMNPQRSTUVWXYZ123456789'
+ crc = (~binascii.crc32(s,-1))&0xFFFFFFFF
+ crc = crc ^ (crc >> 16)
+ res = s
+ l = len(letters)
+ for i in (0,1):
+ b = crc & 0xff
+ pos = (b // l) ^ (b % l)
+ res += letters[pos%l]
+ crc >>= 8
+ return res
+
+def getSizeOfTrailingDataEntries(ptr, size, flags):
+ def getSizeOfTrailingDataEntry(ptr, size):
+ bitpos, result = 0, 0
+ if size <= 0:
+ return result
+ while True:
+ v = ord(ptr[size-1])
+ result |= (v & 0x7F) << bitpos
+ bitpos += 7
+ size -= 1
+ if (v & 0x80) != 0 or (bitpos >= 28) or (size == 0):
+ return result
+ num = 0
+ testflags = flags >> 1
+ while testflags:
+ if testflags & 1:
+ num += getSizeOfTrailingDataEntry(ptr, size - num)
+ testflags >>= 1
+ # Check the low bit to see if there's multibyte data present.
+ # if multibyte data is included in the encryped data, we'll
+ # have already cleared this flag.
+ if flags & 1:
+ num += (ord(ptr[size - num - 1]) & 0x3) + 1
+ return num
+
+
+
+class MobiBook:
+ def loadSection(self, section):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
+ else:
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ return self.data_file[off:endoff]
+
+ def cleanup(self):
+ # to match function in Topaz book
+ pass
+
+ def __init__(self, infile):
+ print u"MobiDeDrm v{0:s}.\nCopyright © 2008-2012 The Dark Reverser et al.".format(__version__)
+
+ try:
+ from alfcrypto import Pukall_Cipher
+ except:
+ print u"AlfCrypto not found. Using python PC1 implementation."
+
+ # initial sanity check on file
+ self.data_file = file(infile, 'rb').read()
+ self.mobi_data = ''
+ self.header = self.data_file[0:78]
+ if self.header[0x3C:0x3C+8] != 'BOOKMOBI' and self.header[0x3C:0x3C+8] != 'TEXtREAd':
+ raise DrmException(u"Invalid file format")
+ self.magic = self.header[0x3C:0x3C+8]
+ self.crypto_type = -1
+
+ # build up section offset and flag info
+ self.num_sections, = struct.unpack('>H', self.header[76:78])
+ self.sections = []
+ for i in xrange(self.num_sections):
+ offset, a1,a2,a3,a4 = struct.unpack('>LBBBB', self.data_file[78+i*8:78+i*8+8])
+ flags, val = a1, a2<<16|a3<<8|a4
+ self.sections.append( (offset, flags, val) )
+
+ # parse information from section 0
+ self.sect = self.loadSection(0)
+ self.records, = struct.unpack('>H', self.sect[0x8:0x8+2])
+ self.compression, = struct.unpack('>H', self.sect[0x0:0x0+2])
+
+ # det default values before PalmDoc test
+ self.print_replica = False
+ self.extra_data_flags = 0
+ self.meta_array = {}
+ self.mobi_length = 0
+ self.mobi_codepage = 1252
+ self.mobi_version = -1
+
+ if self.magic == 'TEXtREAd':
+ print u"PalmDoc format book detected."
+ return
+
+ self.mobi_length, = struct.unpack('>L',self.sect[0x14:0x18])
+ self.mobi_codepage, = struct.unpack('>L',self.sect[0x1c:0x20])
+ self.mobi_version, = struct.unpack('>L',self.sect[0x68:0x6C])
+ print u"MOBI header version {0:d}, header length {1:d}".format(self.mobi_version, self.mobi_length)
+ if (self.mobi_length >= 0xE4) and (self.mobi_version >= 5):
+ self.extra_data_flags, = struct.unpack('>H', self.sect[0xF2:0xF4])
+ print u"Extra Data Flags: {0:d}".format(self.extra_data_flags)
+ if (self.compression != 17480):
+ # multibyte utf8 data is included in the encryption for PalmDoc compression
+ # so clear that byte so that we leave it to be decrypted.
+ self.extra_data_flags &= 0xFFFE
+
+ # if exth region exists parse it for metadata array
+ try:
+ exth_flag, = struct.unpack('>L', self.sect[0x80:0x84])
+ exth = ''
+ if exth_flag & 0x40:
+ exth = self.sect[16 + self.mobi_length:]
+ if (len(exth) >= 12) and (exth[:4] == 'EXTH'):
+ nitems, = struct.unpack('>I', exth[8:12])
+ pos = 12
+ for i in xrange(nitems):
+ type, size = struct.unpack('>II', exth[pos: pos + 8])
+ content = exth[pos + 8: pos + size]
+ self.meta_array[type] = content
+ # reset the text to speech flag and clipping limit, if present
+ if type == 401 and size == 9:
+ # set clipping limit to 100%
+ self.patchSection(0, '\144', 16 + self.mobi_length + pos + 8)
+ elif type == 404 and size == 9:
+ # make sure text to speech is enabled
+ self.patchSection(0, '\0', 16 + self.mobi_length + pos + 8)
+ # print type, size, content, content.encode('hex')
+ pos += size
+ except:
+ pass
+
+ def getBookTitle(self):
+ codec_map = {
+ 1252 : 'windows-1252',
+ 65001 : 'utf-8',
+ }
+ title = ''
+ codec = 'windows-1252'
+ if self.magic == 'BOOKMOBI':
+ if 503 in self.meta_array:
+ title = self.meta_array[503]
+ else:
+ toff, tlen = struct.unpack('>II', self.sect[0x54:0x5c])
+ tend = toff + tlen
+ title = self.sect[toff:tend]
+ if self.mobi_codepage in codec_map.keys():
+ codec = codec_map[self.mobi_codepage]
+ if title == '':
+ title = self.header[:32]
+ title = title.split('\0')[0]
+ return unicode(title, codec)
+
+ def getPIDMetaInfo(self):
+ rec209 = ''
+ token = ''
+ if 209 in self.meta_array:
+ rec209 = self.meta_array[209]
+ data = rec209
+ # The 209 data comes in five byte groups. Interpret the last four bytes
+ # of each group as a big endian unsigned integer to get a key value
+ # if that key exists in the meta_array, append its contents to the token
+ for i in xrange(0,len(data),5):
+ val, = struct.unpack('>I',data[i+1:i+5])
+ sval = self.meta_array.get(val,'')
+ token += sval
+ return rec209, token
+
+ def patch(self, off, new):
+ self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
+
+ def patchSection(self, section, new, in_off = 0):
+ if (section + 1 == self.num_sections):
+ endoff = len(self.data_file)
+ else:
+ endoff = self.sections[section + 1][0]
+ off = self.sections[section][0]
+ assert off + in_off + len(new) <= endoff
+ self.patch(off + in_off, new)
+
+ def parseDRM(self, data, count, pidlist):
+ found_key = None
+ keyvec1 = '\x72\x38\x33\xB0\xB4\xF2\xE3\xCA\xDF\x09\x01\xD6\xE2\xE0\x3F\x96'
+ for pid in pidlist:
+ bigpid = pid.ljust(16,'\0')
+ temp_key = PC1(keyvec1, bigpid, False)
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ found_key = None
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver and (flags & 0x1F) == 1:
+ found_key = finalkey
+ break
+ if found_key != None:
+ break
+ if not found_key:
+ # Then try the default encoding that doesn't require a PID
+ pid = '00000000'
+ temp_key = keyvec1
+ temp_key_sum = sum(map(ord,temp_key)) & 0xff
+ for i in xrange(count):
+ verification, size, type, cksum, cookie = struct.unpack('>LLLBxxx32s', data[i*0x30:i*0x30+0x30])
+ if cksum == temp_key_sum:
+ cookie = PC1(temp_key, cookie)
+ ver,flags,finalkey,expiry,expiry2 = struct.unpack('>LL16sLL', cookie)
+ if verification == ver:
+ found_key = finalkey
+ break
+ return [found_key,pid]
+
+ def getFile(self, outpath):
+ file(outpath,'wb').write(self.mobi_data)
+
+ def getBookType(self):
+ if self.print_replica:
+ return u"Print Replica"
+ if self.mobi_version >= 8:
+ return u"Kindle Format 8"
+ if self.mobi_version >= 0:
+ return u"Mobipocket {0:d}".format(self.mobi_version)
+ return u"PalmDoc"
+
+ def getBookExtension(self):
+ if self.print_replica:
+ return u".azw4"
+ if self.mobi_version >= 8:
+ return u".azw3"
+ return u".mobi"
+
+ def processBook(self, pidlist):
+ crypto_type, = struct.unpack('>H', self.sect[0xC:0xC+2])
+ print u"Crypto Type is: {0:d}".format(crypto_type)
+ self.crypto_type = crypto_type
+ if crypto_type == 0:
+ print u"This book is not encrypted."
+ # we must still check for Print Replica
+ self.print_replica = (self.loadSection(1)[0:4] == '%MOP')
+ self.mobi_data = self.data_file
+ return
+ if crypto_type != 2 and crypto_type != 1:
+ raise DrmException(u"Cannot decode unknown Mobipocket encryption type {0:d}".format(crypto_type))
+ if 406 in self.meta_array:
+ data406 = self.meta_array[406]
+ val406, = struct.unpack('>Q',data406)
+ if val406 != 0:
+ raise DrmException(u"Cannot decode library or rented ebooks.")
+
+ goodpids = []
+ for pid in pidlist:
+ if len(pid)==10:
+ if checksumPid(pid[0:-2]) != pid:
+ print u"Warning: PID {0} has incorrect checksum, should have been {1}".format(pid,checksumPid(pid[0:-2]))
+ goodpids.append(pid[0:-2])
+ elif len(pid)==8:
+ goodpids.append(pid)
+
+ if self.crypto_type == 1:
+ t1_keyvec = 'QDCVEPMU675RUBSZ'
+ if self.magic == 'TEXtREAd':
+ bookkey_data = self.sect[0x0E:0x0E+16]
+ elif self.mobi_version < 0:
+ bookkey_data = self.sect[0x90:0x90+16]
+ else:
+ bookkey_data = self.sect[self.mobi_length+16:self.mobi_length+32]
+ pid = '00000000'
+ found_key = PC1(t1_keyvec, bookkey_data)
+ else :
+ # calculate the keys
+ drm_ptr, drm_count, drm_size, drm_flags = struct.unpack('>LLLL', self.sect[0xA8:0xA8+16])
+ if drm_count == 0:
+ raise DrmException(u"Encryption not initialised. Must be opened with Mobipocket Reader first.")
+ found_key, pid = self.parseDRM(self.sect[drm_ptr:drm_ptr+drm_size], drm_count, goodpids)
+ if not found_key:
+ raise DrmException(u"No key found in {0:d} keys tried.".format(len(goodpids)))
+ # kill the drm keys
+ self.patchSection(0, '\0' * drm_size, drm_ptr)
+ # kill the drm pointers
+ self.patchSection(0, '\xff' * 4 + '\0' * 12, 0xA8)
+
+ if pid=='00000000':
+ print u"File has default encryption, no specific key needed."
+ else:
+ print u"File is encoded with PID {0}.".format(checksumPid(pid))
+
+ # clear the crypto type
+ self.patchSection(0, "\0" * 2, 0xC)
+
+ # decrypt sections
+ print u"Decrypting. Please wait . . .",
+ mobidataList = []
+ mobidataList.append(self.data_file[:self.sections[1][0]])
+ for i in xrange(1, self.records+1):
+ data = self.loadSection(i)
+ extra_size = getSizeOfTrailingDataEntries(data, len(data), self.extra_data_flags)
+ if i%100 == 0:
+ print u".",
+ # print "record %d, extra_size %d" %(i,extra_size)
+ decoded_data = PC1(found_key, data[0:len(data) - extra_size])
+ if i==1:
+ self.print_replica = (decoded_data[0:4] == '%MOP')
+ mobidataList.append(decoded_data)
+ if extra_size > 0:
+ mobidataList.append(data[-extra_size:])
+ if self.num_sections > self.records+1:
+ mobidataList.append(self.data_file[self.sections[self.records+1][0]:])
+ self.mobi_data = "".join(mobidataList)
+ print u"done"
+ return
+
+def getUnencryptedBook(infile,pidlist):
+ if not os.path.isfile(infile):
+ raise DrmException(u"Input File Not Found.")
+ book = MobiBook(infile)
+ book.processBook(pidlist)
+ return book.mobi_data
+
+
+def cli_main():
+ argv=unicode_argv()
+ progname = os.path.basename(argv[0])
+ if len(argv)<3 or len(argv)>4:
+ print u"MobiDeDrm v{0}.\nCopyright © 2008-2012 The Dark Reverser et al.".format(__version__)
+ print u"Removes protection from Kindle/Mobipocket, Kindle/KF8 and Kindle/Print Replica ebooks"
+ print u"Usage:"
+ print u" {0} <infile> <outfile> [<Comma separated list of PIDs to try>]".format(progname)
+ return 1
else:
- libcrypto = find_library('crypto')
-
- if libcrypto is None:
- return None
-
- libcrypto = CDLL(libcrypto)
-
- # typedef struct DES_ks
- # {
- # union
- # {
- # DES_cblock cblock;
- # /* make sure things are correct size on machines with
- # * 8 byte longs */
- # DES_LONG deslong[2];
- # } ks[16];
- # } DES_key_schedule;
-
- # just create a big enough place to hold everything
- # it will have alignment of structure so we should be okay (16 byte aligned?)
- class DES_KEY_SCHEDULE(Structure):
- _fields_ = [('DES_cblock1', c_char * 16),
- ('DES_cblock2', c_char * 16),
- ('DES_cblock3', c_char * 16),
- ('DES_cblock4', c_char * 16),
- ('DES_cblock5', c_char * 16),
- ('DES_cblock6', c_char * 16),
- ('DES_cblock7', c_char * 16),
- ('DES_cblock8', c_char * 16),
- ('DES_cblock9', c_char * 16),
- ('DES_cblock10', c_char * 16),
- ('DES_cblock11', c_char * 16),
- ('DES_cblock12', c_char * 16),
- ('DES_cblock13', c_char * 16),
- ('DES_cblock14', c_char * 16),
- ('DES_cblock15', c_char * 16),
- ('DES_cblock16', c_char * 16)]
-
- DES_KEY_SCHEDULE_p = POINTER(DES_KEY_SCHEDULE)
-
- def F(restype, name, argtypes):
- func = getattr(libcrypto, name)
- func.restype = restype
- func.argtypes = argtypes
- return func
-
- DES_set_key = F(None, 'DES_set_key',[c_char_p, DES_KEY_SCHEDULE_p])
- DES_ecb_encrypt = F(None, 'DES_ecb_encrypt',[c_char_p, c_char_p, DES_KEY_SCHEDULE_p, c_int])
-
-
- class DES(object):
- def __init__(self, key):
- if len(key) != 8 :
- raise Exception('DES improper key used')
- return
- self.key = key
- self.keyschedule = DES_KEY_SCHEDULE()
- DES_set_key(self.key, self.keyschedule)
- def desdecrypt(self, data):
- ob = create_string_buffer(len(data))
- DES_ecb_encrypt(data, ob, self.keyschedule, 0)
- return ob.raw
- def decrypt(self, data):
- if not data:
- return ''
- i = 0
- result = []
- while i < len(data):
- block = data[i:i+8]
- processed_block = self.desdecrypt(block)
- result.append(processed_block)
- i += 8
- return ''.join(result)
-
- return DES
+ infile = argv[1]
+ outfile = argv[2]
+ if len(argv) is 4:
+ pidlist = argv[3].split(',')
+ else:
+ pidlist = []
+ try:
+ stripped_file = getUnencryptedBook(infile, pidlist)
+ file(outfile, 'wb').write(stripped_file)
+ except DrmException, e:
+ print u"MobiDeDRM v{0} Error: {0:s}".format(__version__,e.args[0])
+ return 1
+ return 0
+
+
+if __name__ == '__main__':
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
+ sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py b/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py
index e69de29..9a84e58 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/openssl_des.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+# implement just enough of des from openssl to make erdr2pml.py happy
+
+def load_libcrypto():
+ from ctypes import CDLL, POINTER, c_void_p, c_char_p, c_char, c_int, c_long, \
+ Structure, c_ulong, create_string_buffer, cast
+ from ctypes.util import find_library
+ import sys
+
+ if sys.platform.startswith('win'):
+ libcrypto = find_library('libeay32')
+ else:
+ libcrypto = find_library('crypto')
+
+ if libcrypto is None:
+ return None
+
+ libcrypto = CDLL(libcrypto)
+
+ # typedef struct DES_ks
+ # {
+ # union
+ # {
+ # DES_cblock cblock;
+ # /* make sure things are correct size on machines with
+ # * 8 byte longs */
+ # DES_LONG deslong[2];
+ # } ks[16];
+ # } DES_key_schedule;
+
+ # just create a big enough place to hold everything
+ # it will have alignment of structure so we should be okay (16 byte aligned?)
+ class DES_KEY_SCHEDULE(Structure):
+ _fields_ = [('DES_cblock1', c_char * 16),
+ ('DES_cblock2', c_char * 16),
+ ('DES_cblock3', c_char * 16),
+ ('DES_cblock4', c_char * 16),
+ ('DES_cblock5', c_char * 16),
+ ('DES_cblock6', c_char * 16),
+ ('DES_cblock7', c_char * 16),
+ ('DES_cblock8', c_char * 16),
+ ('DES_cblock9', c_char * 16),
+ ('DES_cblock10', c_char * 16),
+ ('DES_cblock11', c_char * 16),
+ ('DES_cblock12', c_char * 16),
+ ('DES_cblock13', c_char * 16),
+ ('DES_cblock14', c_char * 16),
+ ('DES_cblock15', c_char * 16),
+ ('DES_cblock16', c_char * 16)]
+
+ DES_KEY_SCHEDULE_p = POINTER(DES_KEY_SCHEDULE)
+
+ def F(restype, name, argtypes):
+ func = getattr(libcrypto, name)
+ func.restype = restype
+ func.argtypes = argtypes
+ return func
+
+ DES_set_key = F(None, 'DES_set_key',[c_char_p, DES_KEY_SCHEDULE_p])
+ DES_ecb_encrypt = F(None, 'DES_ecb_encrypt',[c_char_p, c_char_p, DES_KEY_SCHEDULE_p, c_int])
+
+
+ class DES(object):
+ def __init__(self, key):
+ if len(key) != 8 :
+ raise Exception('DES improper key used')
+ return
+ self.key = key
+ self.keyschedule = DES_KEY_SCHEDULE()
+ DES_set_key(self.key, self.keyschedule)
+ def desdecrypt(self, data):
+ ob = create_string_buffer(len(data))
+ DES_ecb_encrypt(data, ob, self.keyschedule, 0)
+ return ob.raw
+ def decrypt(self, data):
+ if not data:
+ return ''
+ i = 0
+ result = []
+ while i < len(data):
+ block = data[i:i+8]
+ processed_block = self.desdecrypt(block)
+ result.append(processed_block)
+ i += 8
+ return ''.join(result)
+
+ return DES
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt b/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt
index 05065ac..e69de29 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/plugin-import-name-dedrm.txt
@@ -1,292 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
-
-from __future__ import with_statement
-__license__ = 'GPL v3'
-
-# Standard Python modules.
-import os, sys, re, hashlib
-import json
-import traceback
-
-from calibre.utils.config import dynamic, config_dir, JSONConfig
-from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION
-from calibre.constants import iswindows, isosx
-
-class DeDRM_Prefs():
- def __init__(self):
- JSON_PATH = os.path.join(u"plugins", PLUGIN_NAME.strip().lower().replace(' ', '_') + '.json')
- self.dedrmprefs = JSONConfig(JSON_PATH)
-
- self.dedrmprefs.defaults['configured'] = False
- self.dedrmprefs.defaults['bandnkeys'] = {}
- self.dedrmprefs.defaults['adeptkeys'] = {}
- self.dedrmprefs.defaults['ereaderkeys'] = {}
- self.dedrmprefs.defaults['kindlekeys'] = {}
- self.dedrmprefs.defaults['pids'] = []
- self.dedrmprefs.defaults['serials'] = []
- self.dedrmprefs.defaults['adobewineprefix'] = ""
- self.dedrmprefs.defaults['kindlewineprefix'] = ""
-
- # initialise
- # we must actually set the prefs that are dictionaries and lists
- # to empty dictionaries and lists, otherwise we are unable to add to them
- # as then it just adds to the (memory only) dedrmprefs.defaults versions!
- if self.dedrmprefs['bandnkeys'] == {}:
- self.dedrmprefs['bandnkeys'] = {}
- if self.dedrmprefs['adeptkeys'] == {}:
- self.dedrmprefs['adeptkeys'] = {}
- if self.dedrmprefs['ereaderkeys'] == {}:
- self.dedrmprefs['ereaderkeys'] = {}
- if self.dedrmprefs['kindlekeys'] == {}:
- self.dedrmprefs['kindlekeys'] = {}
- if self.dedrmprefs['pids'] == []:
- self.dedrmprefs['pids'] = []
- if self.dedrmprefs['serials'] == []:
- self.dedrmprefs['serials'] = []
-
- def __getitem__(self,kind = None):
- if kind is not None:
- return self.dedrmprefs[kind]
- return self.dedrmprefs
-
- def set(self, kind, value):
- self.dedrmprefs[kind] = value
-
- def writeprefs(self,value = True):
- self.dedrmprefs['configured'] = value
-
- def addnamedvaluetoprefs(self, prefkind, keyname, keyvalue):
- try:
- if keyvalue not in self.dedrmprefs[prefkind].values():
- # ensure that the keyname is unique
- # by adding a number (starting with 2) to the name if it is not
- namecount = 1
- newname = keyname
- while newname in self.dedrmprefs[prefkind]:
- namecount += 1
- newname = "{0:s}_{1:d}".format(keyname,namecount)
- # add to the preferences
- self.dedrmprefs[prefkind][newname] = keyvalue
- return (True, newname)
- except:
- traceback.print_exc()
- pass
- return (False, keyname)
-
- def addvaluetoprefs(self, prefkind, prefsvalue):
- # ensure the keyvalue isn't already in the preferences
- try:
- if prefsvalue not in self.dedrmprefs[prefkind]:
- self.dedrmprefs[prefkind].append(prefsvalue)
- return True
- except:
- traceback.print_exc()
- return False
-
-
-def convertprefs(always = False):
-
- def parseIgnobleString(keystuff):
- from calibre_plugins.dedrm.ignoblekeygen import generate_key
- userkeys = []
- ar = keystuff.split(':')
- for keystring in ar:
- try:
- name, ccn = keystring.split(',')
- # Generate Barnes & Noble EPUB user key from name and credit card number.
- keyname = u"{0}_{1}".format(name.strip(),ccn.strip()[-4:])
- keyvalue = generate_key(name, ccn)
- userkeys.append([keyname,keyvalue])
- except Exception, e:
- traceback.print_exc()
- print e.args[0]
- pass
- return userkeys
-
- def parseeReaderString(keystuff):
- from calibre_plugins.dedrm.erdr2pml import getuser_key
- userkeys = []
- ar = keystuff.split(':')
- for keystring in ar:
- try:
- name, cc = keystring.split(',')
- # Generate eReader user key from name and credit card number.
- keyname = u"{0}_{1}".format(name.strip(),cc.strip()[-4:])
- keyvalue = getuser_key(name,cc).encode('hex')
- userkeys.append([keyname,keyvalue])
- except Exception, e:
- traceback.print_exc()
- print e.args[0]
- pass
- return userkeys
-
- def parseKindleString(keystuff):
- pids = []
- serials = []
- ar = keystuff.split(',')
- for keystring in ar:
- keystring = str(keystring).strip().replace(" ","")
- if len(keystring) == 10 or len(keystring) == 8 and keystring not in pids:
- pids.append(keystring)
- elif len(keystring) == 16 and keystring[0] == 'B' and keystring not in serials:
- serials.append(keystring)
- return (pids,serials)
-
- def getConfigFiles(extension, encoding = None):
- # get any files with extension 'extension' in the config dir
- userkeys = []
- files = [f for f in os.listdir(config_dir) if f.endswith(extension)]
- for filename in files:
- try:
- fpath = os.path.join(config_dir, filename)
- key = os.path.splitext(filename)[0]
- value = open(fpath, 'rb').read()
- if encoding is not None:
- value = value.encode(encoding)
- userkeys.append([key,value])
- except:
- traceback.print_exc()
- pass
- return userkeys
-
- dedrmprefs = DeDRM_Prefs()
-
- if (not always) and dedrmprefs['configured']:
- # We've already converted old preferences,
- # and we're not being forced to do it again, so just return
- return
-
-
- print u"{0} v{1}: Importing configuration data from old DeDRM plugins".format(PLUGIN_NAME, PLUGIN_VERSION)
-
- IGNOBLEPLUGINNAME = "Ignoble Epub DeDRM"
- EREADERPLUGINNAME = "eReader PDB 2 PML"
- OLDKINDLEPLUGINNAME = "K4PC, K4Mac, Kindle Mobi and Topaz DeDRM"
-
- # get prefs from older tools
- kindleprefs = JSONConfig(os.path.join(u"plugins", u"K4MobiDeDRM"))
- ignobleprefs = JSONConfig(os.path.join(u"plugins", u"ignoble_epub_dedrm"))
-
- # Handle the old ignoble plugin's customization string by converting the
- # old string to stored keys... get that personal data out of plain sight.
- from calibre.customize.ui import config
- sc = config['plugin_customization']
- val = sc.pop(IGNOBLEPLUGINNAME, None)
- if val is not None:
- print u"{0} v{1}: Converting old Ignoble plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
- priorkeycount = len(dedrmprefs['bandnkeys'])
- userkeys = parseIgnobleString(str(val))
- for keypair in userkeys:
- name = keypair[0]
- value = keypair[1]
- dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
- addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount
- print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from old Ignoble plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # Handle the old eReader plugin's customization string by converting the
- # old string to stored keys... get that personal data out of plain sight.
- val = sc.pop(EREADERPLUGINNAME, None)
- if val is not None:
- print u"{0} v{1}: Converting old eReader plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
- priorkeycount = len(dedrmprefs['ereaderkeys'])
- userkeys = parseeReaderString(str(val))
- for keypair in userkeys:
- name = keypair[0]
- value = keypair[1]
- dedrmprefs.addnamedvaluetoprefs('ereaderkeys', name, value)
- addedkeycount = len(dedrmprefs['ereaderkeys'])-priorkeycount
- print u"{0} v{1}: {2:d} eReader {3} imported from old eReader plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # get old Kindle plugin configuration string
- val = sc.pop(OLDKINDLEPLUGINNAME, None)
- if val is not None:
- print u"{0} v{1}: Converting old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
- priorpidcount = len(dedrmprefs['pids'])
- priorserialcount = len(dedrmprefs['serials'])
- pids, serials = parseKindleString(val)
- for pid in pids:
- dedrmprefs.addvaluetoprefs('pids',pid)
- for serial in serials:
- dedrmprefs.addvaluetoprefs('serials',serial)
- addedpidcount = len(dedrmprefs['pids']) - priorpidcount
- addedserialcount = len(dedrmprefs['serials']) - priorserialcount
- print u"{0} v{1}: {2:d} {3} and {4:d} {5} imported from old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs", addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # copy the customisations back into calibre preferences, as we've now removed the nasty plaintext
- config['plugin_customization'] = sc
-
- # get any .b64 files in the config dir
- priorkeycount = len(dedrmprefs['bandnkeys'])
- bandnfilekeys = getConfigFiles('.b64')
- for keypair in bandnfilekeys:
- name = keypair[0]
- value = keypair[1]
- dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
- addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount
- if addedkeycount > 0:
- print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key file" if addedkeycount==1 else u"key files")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # get any .der files in the config dir
- priorkeycount = len(dedrmprefs['adeptkeys'])
- adeptfilekeys = getConfigFiles('.der','hex')
- for keypair in adeptfilekeys:
- name = keypair[0]
- value = keypair[1]
- dedrmprefs.addnamedvaluetoprefs('adeptkeys', name, value)
- addedkeycount = len(dedrmprefs['adeptkeys'])-priorkeycount
- if addedkeycount > 0:
- print u"{0} v{1}: {2:d} Adobe Adept {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"keyfile" if addedkeycount==1 else u"keyfiles")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # get ignoble json prefs
- if 'keys' in ignobleprefs:
- priorkeycount = len(dedrmprefs['bandnkeys'])
- for name in ignobleprefs['keys']:
- value = ignobleprefs['keys'][name]
- dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
- addedkeycount = len(dedrmprefs['bandnkeys']) - priorkeycount
- # no need to delete old prefs, since they contain no recoverable private data
- if addedkeycount > 0:
- print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from Ignoble plugin preferences.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs(False)
-
- # get kindle json prefs
- priorpidcount = len(dedrmprefs['pids'])
- priorserialcount = len(dedrmprefs['serials'])
- if 'pids' in kindleprefs:
- pids, serials = parseKindleString(kindleprefs['pids'])
- for pid in pids:
- dedrmprefs.addvaluetoprefs('pids',pid)
- if 'serials' in kindleprefs:
- pids, serials = parseKindleString(kindleprefs['serials'])
- for serial in serials:
- dedrmprefs.addvaluetoprefs('serials',serial)
- addedpidcount = len(dedrmprefs['pids']) - priorpidcount
- if addedpidcount > 0:
- print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs")
- addedserialcount = len(dedrmprefs['serials']) - priorserialcount
- if addedserialcount > 0:
- print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers")
- try:
- if 'wineprefix' in kindleprefs and kindleprefs['wineprefix'] != "":
- dedrmprefs.set('adobewineprefix',kindleprefs['wineprefix'])
- dedrmprefs.set('kindlewineprefix',kindleprefs['wineprefix'])
- print u"{0} v{1}: WINEPREFIX ‘(2)’ imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, kindleprefs['wineprefix'])
- except:
- traceback.print_exc()
-
-
- # Make the json write all the prefs to disk
- dedrmprefs.writeprefs()
- print u"{0} v{1}: Finished setting up configuration data.".format(PLUGIN_NAME, PLUGIN_VERSION)
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py b/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py
new file mode 100644
index 0000000..05065ac
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/prefs.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from __future__ import with_statement
+__license__ = 'GPL v3'
+
+# Standard Python modules.
+import os, sys, re, hashlib
+import json
+import traceback
+
+from calibre.utils.config import dynamic, config_dir, JSONConfig
+from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION
+from calibre.constants import iswindows, isosx
+
+class DeDRM_Prefs():
+ def __init__(self):
+ JSON_PATH = os.path.join(u"plugins", PLUGIN_NAME.strip().lower().replace(' ', '_') + '.json')
+ self.dedrmprefs = JSONConfig(JSON_PATH)
+
+ self.dedrmprefs.defaults['configured'] = False
+ self.dedrmprefs.defaults['bandnkeys'] = {}
+ self.dedrmprefs.defaults['adeptkeys'] = {}
+ self.dedrmprefs.defaults['ereaderkeys'] = {}
+ self.dedrmprefs.defaults['kindlekeys'] = {}
+ self.dedrmprefs.defaults['pids'] = []
+ self.dedrmprefs.defaults['serials'] = []
+ self.dedrmprefs.defaults['adobewineprefix'] = ""
+ self.dedrmprefs.defaults['kindlewineprefix'] = ""
+
+ # initialise
+ # we must actually set the prefs that are dictionaries and lists
+ # to empty dictionaries and lists, otherwise we are unable to add to them
+ # as then it just adds to the (memory only) dedrmprefs.defaults versions!
+ if self.dedrmprefs['bandnkeys'] == {}:
+ self.dedrmprefs['bandnkeys'] = {}
+ if self.dedrmprefs['adeptkeys'] == {}:
+ self.dedrmprefs['adeptkeys'] = {}
+ if self.dedrmprefs['ereaderkeys'] == {}:
+ self.dedrmprefs['ereaderkeys'] = {}
+ if self.dedrmprefs['kindlekeys'] == {}:
+ self.dedrmprefs['kindlekeys'] = {}
+ if self.dedrmprefs['pids'] == []:
+ self.dedrmprefs['pids'] = []
+ if self.dedrmprefs['serials'] == []:
+ self.dedrmprefs['serials'] = []
+
+ def __getitem__(self,kind = None):
+ if kind is not None:
+ return self.dedrmprefs[kind]
+ return self.dedrmprefs
+
+ def set(self, kind, value):
+ self.dedrmprefs[kind] = value
+
+ def writeprefs(self,value = True):
+ self.dedrmprefs['configured'] = value
+
+ def addnamedvaluetoprefs(self, prefkind, keyname, keyvalue):
+ try:
+ if keyvalue not in self.dedrmprefs[prefkind].values():
+ # ensure that the keyname is unique
+ # by adding a number (starting with 2) to the name if it is not
+ namecount = 1
+ newname = keyname
+ while newname in self.dedrmprefs[prefkind]:
+ namecount += 1
+ newname = "{0:s}_{1:d}".format(keyname,namecount)
+ # add to the preferences
+ self.dedrmprefs[prefkind][newname] = keyvalue
+ return (True, newname)
+ except:
+ traceback.print_exc()
+ pass
+ return (False, keyname)
+
+ def addvaluetoprefs(self, prefkind, prefsvalue):
+ # ensure the keyvalue isn't already in the preferences
+ try:
+ if prefsvalue not in self.dedrmprefs[prefkind]:
+ self.dedrmprefs[prefkind].append(prefsvalue)
+ return True
+ except:
+ traceback.print_exc()
+ return False
+
+
+def convertprefs(always = False):
+
+ def parseIgnobleString(keystuff):
+ from calibre_plugins.dedrm.ignoblekeygen import generate_key
+ userkeys = []
+ ar = keystuff.split(':')
+ for keystring in ar:
+ try:
+ name, ccn = keystring.split(',')
+ # Generate Barnes & Noble EPUB user key from name and credit card number.
+ keyname = u"{0}_{1}".format(name.strip(),ccn.strip()[-4:])
+ keyvalue = generate_key(name, ccn)
+ userkeys.append([keyname,keyvalue])
+ except Exception, e:
+ traceback.print_exc()
+ print e.args[0]
+ pass
+ return userkeys
+
+ def parseeReaderString(keystuff):
+ from calibre_plugins.dedrm.erdr2pml import getuser_key
+ userkeys = []
+ ar = keystuff.split(':')
+ for keystring in ar:
+ try:
+ name, cc = keystring.split(',')
+ # Generate eReader user key from name and credit card number.
+ keyname = u"{0}_{1}".format(name.strip(),cc.strip()[-4:])
+ keyvalue = getuser_key(name,cc).encode('hex')
+ userkeys.append([keyname,keyvalue])
+ except Exception, e:
+ traceback.print_exc()
+ print e.args[0]
+ pass
+ return userkeys
+
+ def parseKindleString(keystuff):
+ pids = []
+ serials = []
+ ar = keystuff.split(',')
+ for keystring in ar:
+ keystring = str(keystring).strip().replace(" ","")
+ if len(keystring) == 10 or len(keystring) == 8 and keystring not in pids:
+ pids.append(keystring)
+ elif len(keystring) == 16 and keystring[0] == 'B' and keystring not in serials:
+ serials.append(keystring)
+ return (pids,serials)
+
+ def getConfigFiles(extension, encoding = None):
+ # get any files with extension 'extension' in the config dir
+ userkeys = []
+ files = [f for f in os.listdir(config_dir) if f.endswith(extension)]
+ for filename in files:
+ try:
+ fpath = os.path.join(config_dir, filename)
+ key = os.path.splitext(filename)[0]
+ value = open(fpath, 'rb').read()
+ if encoding is not None:
+ value = value.encode(encoding)
+ userkeys.append([key,value])
+ except:
+ traceback.print_exc()
+ pass
+ return userkeys
+
+ dedrmprefs = DeDRM_Prefs()
+
+ if (not always) and dedrmprefs['configured']:
+ # We've already converted old preferences,
+ # and we're not being forced to do it again, so just return
+ return
+
+
+ print u"{0} v{1}: Importing configuration data from old DeDRM plugins".format(PLUGIN_NAME, PLUGIN_VERSION)
+
+ IGNOBLEPLUGINNAME = "Ignoble Epub DeDRM"
+ EREADERPLUGINNAME = "eReader PDB 2 PML"
+ OLDKINDLEPLUGINNAME = "K4PC, K4Mac, Kindle Mobi and Topaz DeDRM"
+
+ # get prefs from older tools
+ kindleprefs = JSONConfig(os.path.join(u"plugins", u"K4MobiDeDRM"))
+ ignobleprefs = JSONConfig(os.path.join(u"plugins", u"ignoble_epub_dedrm"))
+
+ # Handle the old ignoble plugin's customization string by converting the
+ # old string to stored keys... get that personal data out of plain sight.
+ from calibre.customize.ui import config
+ sc = config['plugin_customization']
+ val = sc.pop(IGNOBLEPLUGINNAME, None)
+ if val is not None:
+ print u"{0} v{1}: Converting old Ignoble plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
+ priorkeycount = len(dedrmprefs['bandnkeys'])
+ userkeys = parseIgnobleString(str(val))
+ for keypair in userkeys:
+ name = keypair[0]
+ value = keypair[1]
+ dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
+ addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount
+ print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from old Ignoble plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # Handle the old eReader plugin's customization string by converting the
+ # old string to stored keys... get that personal data out of plain sight.
+ val = sc.pop(EREADERPLUGINNAME, None)
+ if val is not None:
+ print u"{0} v{1}: Converting old eReader plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
+ priorkeycount = len(dedrmprefs['ereaderkeys'])
+ userkeys = parseeReaderString(str(val))
+ for keypair in userkeys:
+ name = keypair[0]
+ value = keypair[1]
+ dedrmprefs.addnamedvaluetoprefs('ereaderkeys', name, value)
+ addedkeycount = len(dedrmprefs['ereaderkeys'])-priorkeycount
+ print u"{0} v{1}: {2:d} eReader {3} imported from old eReader plugin configuration string".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # get old Kindle plugin configuration string
+ val = sc.pop(OLDKINDLEPLUGINNAME, None)
+ if val is not None:
+ print u"{0} v{1}: Converting old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION)
+ priorpidcount = len(dedrmprefs['pids'])
+ priorserialcount = len(dedrmprefs['serials'])
+ pids, serials = parseKindleString(val)
+ for pid in pids:
+ dedrmprefs.addvaluetoprefs('pids',pid)
+ for serial in serials:
+ dedrmprefs.addvaluetoprefs('serials',serial)
+ addedpidcount = len(dedrmprefs['pids']) - priorpidcount
+ addedserialcount = len(dedrmprefs['serials']) - priorserialcount
+ print u"{0} v{1}: {2:d} {3} and {4:d} {5} imported from old Kindle plugin configuration string.".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs", addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # copy the customisations back into calibre preferences, as we've now removed the nasty plaintext
+ config['plugin_customization'] = sc
+
+ # get any .b64 files in the config dir
+ priorkeycount = len(dedrmprefs['bandnkeys'])
+ bandnfilekeys = getConfigFiles('.b64')
+ for keypair in bandnfilekeys:
+ name = keypair[0]
+ value = keypair[1]
+ dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
+ addedkeycount = len(dedrmprefs['bandnkeys'])-priorkeycount
+ if addedkeycount > 0:
+ print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key file" if addedkeycount==1 else u"key files")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # get any .der files in the config dir
+ priorkeycount = len(dedrmprefs['adeptkeys'])
+ adeptfilekeys = getConfigFiles('.der','hex')
+ for keypair in adeptfilekeys:
+ name = keypair[0]
+ value = keypair[1]
+ dedrmprefs.addnamedvaluetoprefs('adeptkeys', name, value)
+ addedkeycount = len(dedrmprefs['adeptkeys'])-priorkeycount
+ if addedkeycount > 0:
+ print u"{0} v{1}: {2:d} Adobe Adept {3} imported from config folder.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"keyfile" if addedkeycount==1 else u"keyfiles")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # get ignoble json prefs
+ if 'keys' in ignobleprefs:
+ priorkeycount = len(dedrmprefs['bandnkeys'])
+ for name in ignobleprefs['keys']:
+ value = ignobleprefs['keys'][name]
+ dedrmprefs.addnamedvaluetoprefs('bandnkeys', name, value)
+ addedkeycount = len(dedrmprefs['bandnkeys']) - priorkeycount
+ # no need to delete old prefs, since they contain no recoverable private data
+ if addedkeycount > 0:
+ print u"{0} v{1}: {2:d} Barnes and Noble {3} imported from Ignoble plugin preferences.".format(PLUGIN_NAME, PLUGIN_VERSION, addedkeycount, u"key" if addedkeycount==1 else u"keys")
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs(False)
+
+ # get kindle json prefs
+ priorpidcount = len(dedrmprefs['pids'])
+ priorserialcount = len(dedrmprefs['serials'])
+ if 'pids' in kindleprefs:
+ pids, serials = parseKindleString(kindleprefs['pids'])
+ for pid in pids:
+ dedrmprefs.addvaluetoprefs('pids',pid)
+ if 'serials' in kindleprefs:
+ pids, serials = parseKindleString(kindleprefs['serials'])
+ for serial in serials:
+ dedrmprefs.addvaluetoprefs('serials',serial)
+ addedpidcount = len(dedrmprefs['pids']) - priorpidcount
+ if addedpidcount > 0:
+ print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedpidcount, u"PID" if addedpidcount==1 else u"PIDs")
+ addedserialcount = len(dedrmprefs['serials']) - priorserialcount
+ if addedserialcount > 0:
+ print u"{0} v{1}: {2:d} {3} imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, addedserialcount, u"serial number" if addedserialcount==1 else u"serial numbers")
+ try:
+ if 'wineprefix' in kindleprefs and kindleprefs['wineprefix'] != "":
+ dedrmprefs.set('adobewineprefix',kindleprefs['wineprefix'])
+ dedrmprefs.set('kindlewineprefix',kindleprefs['wineprefix'])
+ print u"{0} v{1}: WINEPREFIX ‘(2)’ imported from Kindle plugin preferences".format(PLUGIN_NAME, PLUGIN_VERSION, kindleprefs['wineprefix'])
+ except:
+ traceback.print_exc()
+
+
+ # Make the json write all the prefs to disk
+ dedrmprefs.writeprefs()
+ print u"{0} v{1}: Finished setting up configuration data.".format(PLUGIN_NAME, PLUGIN_VERSION)
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py b/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py
index c111850..daa108a 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/stylexml2css.py
@@ -178,7 +178,12 @@ class DocParser(object):
if val == "":
val = 0
- if not ((attr == 'hang') and (int(val) == 0)) :
+ if not ((attr == 'hang') and (int(val) == 0)):
+ try:
+ f = float(val)
+ except:
+ print "Warning: unrecognised val, ignoring"
+ val = 0
pv = float(val)/scale
cssargs[attr] = (self.attr_val_map[attr], pv)
keep = True
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py b/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py
new file mode 100644
index 0000000..de084d3
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/subasyncio.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+import os, sys
+import signal
+import threading
+import subprocess
+from subprocess import Popen, PIPE, STDOUT
+
+# **heavily** chopped up and modfied version of asyncproc.py
+# to make it actually work on Windows as well as Mac/Linux
+# For the original see:
+# "http://www.lysator.liu.se/~bellman/download/"
+# author is "Thomas Bellman <[email protected]>"
+# available under GPL version 3 or Later
+
+# create an asynchronous subprocess whose output can be collected in
+# a non-blocking manner
+
+# What a mess! Have to use threads just to get non-blocking io
+# in a cross-platform manner
+
+# luckily all thread use is hidden within this class
+
+class Process(object):
+ def __init__(self, *params, **kwparams):
+ if len(params) <= 3:
+ kwparams.setdefault('stdin', subprocess.PIPE)
+ if len(params) <= 4:
+ kwparams.setdefault('stdout', subprocess.PIPE)
+ if len(params) <= 5:
+ kwparams.setdefault('stderr', subprocess.PIPE)
+ self.__pending_input = []
+ self.__collected_outdata = []
+ self.__collected_errdata = []
+ self.__exitstatus = None
+ self.__lock = threading.Lock()
+ self.__inputsem = threading.Semaphore(0)
+ self.__quit = False
+
+ self.__process = subprocess.Popen(*params, **kwparams)
+
+ if self.__process.stdin:
+ self.__stdin_thread = threading.Thread(
+ name="stdin-thread",
+ target=self.__feeder, args=(self.__pending_input,
+ self.__process.stdin))
+ self.__stdin_thread.setDaemon(True)
+ self.__stdin_thread.start()
+
+ if self.__process.stdout:
+ self.__stdout_thread = threading.Thread(
+ name="stdout-thread",
+ target=self.__reader, args=(self.__collected_outdata,
+ self.__process.stdout))
+ self.__stdout_thread.setDaemon(True)
+ self.__stdout_thread.start()
+
+ if self.__process.stderr:
+ self.__stderr_thread = threading.Thread(
+ name="stderr-thread",
+ target=self.__reader, args=(self.__collected_errdata,
+ self.__process.stderr))
+ self.__stderr_thread.setDaemon(True)
+ self.__stderr_thread.start()
+
+ def pid(self):
+ return self.__process.pid
+
+ def kill(self, signal):
+ self.__process.send_signal(signal)
+
+ # check on subprocess (pass in 'nowait') to act like poll
+ def wait(self, flag):
+ if flag.lower() == 'nowait':
+ rc = self.__process.poll()
+ else:
+ rc = self.__process.wait()
+ if rc != None:
+ if self.__process.stdin:
+ self.closeinput()
+ if self.__process.stdout:
+ self.__stdout_thread.join()
+ if self.__process.stderr:
+ self.__stderr_thread.join()
+ return self.__process.returncode
+
+ def terminate(self):
+ if self.__process.stdin:
+ self.closeinput()
+ self.__process.terminate()
+
+ # thread gets data from subprocess stdout
+ def __reader(self, collector, source):
+ while True:
+ data = os.read(source.fileno(), 65536)
+ self.__lock.acquire()
+ collector.append(data)
+ self.__lock.release()
+ if data == "":
+ source.close()
+ break
+ return
+
+ # thread feeds data to subprocess stdin
+ def __feeder(self, pending, drain):
+ while True:
+ self.__inputsem.acquire()
+ self.__lock.acquire()
+ if not pending and self.__quit:
+ drain.close()
+ self.__lock.release()
+ break
+ data = pending.pop(0)
+ self.__lock.release()
+ drain.write(data)
+
+ # non-blocking read of data from subprocess stdout
+ def read(self):
+ self.__lock.acquire()
+ outdata = "".join(self.__collected_outdata)
+ del self.__collected_outdata[:]
+ self.__lock.release()
+ return outdata
+
+ # non-blocking read of data from subprocess stderr
+ def readerr(self):
+ self.__lock.acquire()
+ errdata = "".join(self.__collected_errdata)
+ del self.__collected_errdata[:]
+ self.__lock.release()
+ return errdata
+
+ # non-blocking write to stdin of subprocess
+ def write(self, data):
+ if self.__process.stdin is None:
+ raise ValueError("Writing to process with stdin not a pipe")
+ self.__lock.acquire()
+ self.__pending_input.append(data)
+ self.__inputsem.release()
+ self.__lock.release()
+
+ # close stdinput of subprocess
+ def closeinput(self):
+ self.__lock.acquire()
+ self.__quit = True
+ self.__inputsem.release()
+ self.__lock.release()
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py b/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py
index de084d3..fb5eb7a 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/topazextract.py
@@ -1,148 +1,538 @@
#!/usr/bin/env python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-import os, sys
-import signal
-import threading
-import subprocess
-from subprocess import Popen, PIPE, STDOUT
-
-# **heavily** chopped up and modfied version of asyncproc.py
-# to make it actually work on Windows as well as Mac/Linux
-# For the original see:
-# "http://www.lysator.liu.se/~bellman/download/"
-# author is "Thomas Bellman <[email protected]>"
-# available under GPL version 3 or Later
-
-# create an asynchronous subprocess whose output can be collected in
-# a non-blocking manner
-
-# What a mess! Have to use threads just to get non-blocking io
-# in a cross-platform manner
-
-# luckily all thread use is hidden within this class
-
-class Process(object):
- def __init__(self, *params, **kwparams):
- if len(params) <= 3:
- kwparams.setdefault('stdin', subprocess.PIPE)
- if len(params) <= 4:
- kwparams.setdefault('stdout', subprocess.PIPE)
- if len(params) <= 5:
- kwparams.setdefault('stderr', subprocess.PIPE)
- self.__pending_input = []
- self.__collected_outdata = []
- self.__collected_errdata = []
- self.__exitstatus = None
- self.__lock = threading.Lock()
- self.__inputsem = threading.Semaphore(0)
- self.__quit = False
-
- self.__process = subprocess.Popen(*params, **kwparams)
-
- if self.__process.stdin:
- self.__stdin_thread = threading.Thread(
- name="stdin-thread",
- target=self.__feeder, args=(self.__pending_input,
- self.__process.stdin))
- self.__stdin_thread.setDaemon(True)
- self.__stdin_thread.start()
-
- if self.__process.stdout:
- self.__stdout_thread = threading.Thread(
- name="stdout-thread",
- target=self.__reader, args=(self.__collected_outdata,
- self.__process.stdout))
- self.__stdout_thread.setDaemon(True)
- self.__stdout_thread.start()
-
- if self.__process.stderr:
- self.__stderr_thread = threading.Thread(
- name="stderr-thread",
- target=self.__reader, args=(self.__collected_errdata,
- self.__process.stderr))
- self.__stderr_thread.setDaemon(True)
- self.__stderr_thread.start()
-
- def pid(self):
- return self.__process.pid
-
- def kill(self, signal):
- self.__process.send_signal(signal)
-
- # check on subprocess (pass in 'nowait') to act like poll
- def wait(self, flag):
- if flag.lower() == 'nowait':
- rc = self.__process.poll()
+# -*- coding: utf-8 -*-
+
+# topazextract.py
+# Mostly written by some_updates based on code from many others
+
+# Changelog
+# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
+# 5.0 - Fixed potential unicode problem with command line interface
+
+__version__ = '5.0'
+
+import sys
+import os, csv, getopt
+import zlib, zipfile, tempfile, shutil
+import traceback
+from struct import pack
+from struct import unpack
+from alfcrypto import Topaz_Cipher
+
+class SafeUnbuffered:
+ def __init__(self, stream):
+ self.stream = stream
+ self.encoding = stream.encoding
+ if self.encoding == None:
+ self.encoding = "utf-8"
+ def write(self, data):
+ if isinstance(data,unicode):
+ data = data.encode(self.encoding,"replace")
+ self.stream.write(data)
+ self.stream.flush()
+ def __getattr__(self, attr):
+ return getattr(self.stream, attr)
+
+iswindows = sys.platform.startswith('win')
+isosx = sys.platform.startswith('darwin')
+
+def unicode_argv():
+ if iswindows:
+ # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
+ # strings.
+
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'.
+
+
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ xrange(start, argc.value)]
+ # if we don't have any arguments at all, just pass back script name
+ # this should never happen
+ return [u"mobidedrm.py"]
+ else:
+ argvencoding = sys.stdin.encoding
+ if argvencoding == None:
+ argvencoding = 'utf-8'
+ return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
+
+#global switch
+debug = False
+
+if 'calibre' in sys.modules:
+ inCalibre = True
+ from calibre_plugins.dedrm import kgenpids
+else:
+ inCalibre = False
+ import kgenpids
+
+
+class DrmException(Exception):
+ pass
+
+
+# recursive zip creation support routine
+def zipUpDir(myzip, tdir, localname):
+ currentdir = tdir
+ if localname != u"":
+ currentdir = os.path.join(currentdir,localname)
+ list = os.listdir(currentdir)
+ for file in list:
+ afilename = file
+ localfilePath = os.path.join(localname, afilename)
+ realfilePath = os.path.join(currentdir,file)
+ if os.path.isfile(realfilePath):
+ myzip.write(realfilePath, localfilePath)
+ elif os.path.isdir(realfilePath):
+ zipUpDir(myzip, tdir, localfilePath)
+
+#
+# Utility routines
+#
+
+# Get a 7 bit encoded number from file
+def bookReadEncodedNumber(fo):
+ flag = False
+ data = ord(fo.read(1))
+ if data == 0xFF:
+ flag = True
+ data = ord(fo.read(1))
+ if data >= 0x80:
+ datax = (data & 0x7F)
+ while data >= 0x80 :
+ data = ord(fo.read(1))
+ datax = (datax <<7) + (data & 0x7F)
+ data = datax
+ if flag:
+ data = -data
+ return data
+
+# Get a length prefixed string from file
+def bookReadString(fo):
+ stringLength = bookReadEncodedNumber(fo)
+ return unpack(str(stringLength)+'s',fo.read(stringLength))[0]
+
+#
+# crypto routines
+#
+
+# Context initialisation for the Topaz Crypto
+def topazCryptoInit(key):
+ return Topaz_Cipher().ctx_init(key)
+
+# ctx1 = 0x0CAFFE19E
+# for keyChar in key:
+# keyByte = ord(keyChar)
+# ctx2 = ctx1
+# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
+# return [ctx1,ctx2]
+
+# decrypt data with the context prepared by topazCryptoInit()
+def topazCryptoDecrypt(data, ctx):
+ return Topaz_Cipher().decrypt(data, ctx)
+# ctx1 = ctx[0]
+# ctx2 = ctx[1]
+# plainText = ""
+# for dataChar in data:
+# dataByte = ord(dataChar)
+# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
+# ctx2 = ctx1
+# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
+# plainText += chr(m)
+# return plainText
+
+# Decrypt data with the PID
+def decryptRecord(data,PID):
+ ctx = topazCryptoInit(PID)
+ return topazCryptoDecrypt(data, ctx)
+
+# Try to decrypt a dkey record (contains the bookPID)
+def decryptDkeyRecord(data,PID):
+ record = decryptRecord(data,PID)
+ fields = unpack('3sB8sB8s3s',record)
+ if fields[0] != 'PID' or fields[5] != 'pid' :
+ raise DrmException(u"Didn't find PID magic numbers in record")
+ elif fields[1] != 8 or fields[3] != 8 :
+ raise DrmException(u"Record didn't contain correct length fields")
+ elif fields[2] != PID :
+ raise DrmException(u"Record didn't contain PID")
+ return fields[4]
+
+# Decrypt all dkey records (contain the book PID)
+def decryptDkeyRecords(data,PID):
+ nbKeyRecords = ord(data[0])
+ records = []
+ data = data[1:]
+ for i in range (0,nbKeyRecords):
+ length = ord(data[0])
+ try:
+ key = decryptDkeyRecord(data[1:length+1],PID)
+ records.append(key)
+ except DrmException:
+ pass
+ data = data[1+length:]
+ if len(records) == 0:
+ raise DrmException(u"BookKey Not Found")
+ return records
+
+
+class TopazBook:
+ def __init__(self, filename):
+ self.fo = file(filename, 'rb')
+ self.outdir = tempfile.mkdtemp()
+ # self.outdir = 'rawdat'
+ self.bookPayloadOffset = 0
+ self.bookHeaderRecords = {}
+ self.bookMetadata = {}
+ self.bookKey = None
+ magic = unpack('4s',self.fo.read(4))[0]
+ if magic != 'TPZ0':
+ raise DrmException(u"Parse Error : Invalid Header, not a Topaz file")
+ self.parseTopazHeaders()
+ self.parseMetadata()
+
+ def parseTopazHeaders(self):
+ def bookReadHeaderRecordData():
+ # Read and return the data of one header record at the current book file position
+ # [[offset,decompressedLength,compressedLength],...]
+ nbValues = bookReadEncodedNumber(self.fo)
+ if debug: print "%d records in header " % nbValues,
+ values = []
+ for i in range (0,nbValues):
+ values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
+ return values
+ def parseTopazHeaderRecord():
+ # Read and parse one header record at the current book file position and return the associated data
+ # [[offset,decompressedLength,compressedLength],...]
+ if ord(self.fo.read(1)) != 0x63:
+ raise DrmException(u"Parse Error : Invalid Header")
+ tag = bookReadString(self.fo)
+ record = bookReadHeaderRecordData()
+ return [tag,record]
+ nbRecords = bookReadEncodedNumber(self.fo)
+ if debug: print "Headers: %d" % nbRecords
+ for i in range (0,nbRecords):
+ result = parseTopazHeaderRecord()
+ if debug: print result[0], ": ", result[1]
+ self.bookHeaderRecords[result[0]] = result[1]
+ if ord(self.fo.read(1)) != 0x64 :
+ raise DrmException(u"Parse Error : Invalid Header")
+ self.bookPayloadOffset = self.fo.tell()
+
+ def parseMetadata(self):
+ # Parse the metadata record from the book payload and return a list of [key,values]
+ self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0])
+ tag = bookReadString(self.fo)
+ if tag != 'metadata' :
+ raise DrmException(u"Parse Error : Record Names Don't Match")
+ flags = ord(self.fo.read(1))
+ nbRecords = ord(self.fo.read(1))
+ if debug: print "Metadata Records: %d" % nbRecords
+ for i in range (0,nbRecords) :
+ keyval = bookReadString(self.fo)
+ content = bookReadString(self.fo)
+ if debug: print keyval
+ if debug: print content
+ self.bookMetadata[keyval] = content
+ return self.bookMetadata
+
+ def getPIDMetaInfo(self):
+ keysRecord = self.bookMetadata.get('keys','')
+ keysRecordRecord = ''
+ if keysRecord != '':
+ keylst = keysRecord.split(',')
+ for keyval in keylst:
+ keysRecordRecord += self.bookMetadata.get(keyval,'')
+ return keysRecord, keysRecordRecord
+
+ def getBookTitle(self):
+ title = ''
+ if 'Title' in self.bookMetadata:
+ title = self.bookMetadata['Title']
+ return title.decode('utf-8')
+
+ def setBookKey(self, key):
+ self.bookKey = key
+
+ def getBookPayloadRecord(self, name, index):
+ # Get a record in the book payload, given its name and index.
+ # decrypted and decompressed if necessary
+ encrypted = False
+ compressed = False
+ try:
+ recordOffset = self.bookHeaderRecords[name][index][0]
+ except:
+ raise DrmException("Parse Error : Invalid Record, record not found")
+
+ self.fo.seek(self.bookPayloadOffset + recordOffset)
+
+ tag = bookReadString(self.fo)
+ if tag != name :
+ raise DrmException("Parse Error : Invalid Record, record name doesn't match")
+
+ recordIndex = bookReadEncodedNumber(self.fo)
+ if recordIndex < 0 :
+ encrypted = True
+ recordIndex = -recordIndex -1
+
+ if recordIndex != index :
+ raise DrmException("Parse Error : Invalid Record, index doesn't match")
+
+ if (self.bookHeaderRecords[name][index][2] > 0):
+ compressed = True
+ record = self.fo.read(self.bookHeaderRecords[name][index][2])
else:
- rc = self.__process.wait()
- if rc != None:
- if self.__process.stdin:
- self.closeinput()
- if self.__process.stdout:
- self.__stdout_thread.join()
- if self.__process.stderr:
- self.__stderr_thread.join()
- return self.__process.returncode
-
- def terminate(self):
- if self.__process.stdin:
- self.closeinput()
- self.__process.terminate()
-
- # thread gets data from subprocess stdout
- def __reader(self, collector, source):
- while True:
- data = os.read(source.fileno(), 65536)
- self.__lock.acquire()
- collector.append(data)
- self.__lock.release()
- if data == "":
- source.close()
- break
- return
-
- # thread feeds data to subprocess stdin
- def __feeder(self, pending, drain):
- while True:
- self.__inputsem.acquire()
- self.__lock.acquire()
- if not pending and self.__quit:
- drain.close()
- self.__lock.release()
+ record = self.fo.read(self.bookHeaderRecords[name][index][1])
+
+ if encrypted:
+ if self.bookKey:
+ ctx = topazCryptoInit(self.bookKey)
+ record = topazCryptoDecrypt(record,ctx)
+ else :
+ raise DrmException("Error: Attempt to decrypt without bookKey")
+
+ if compressed:
+ record = zlib.decompress(record)
+
+ return record
+
+ def processBook(self, pidlst):
+ raw = 0
+ fixedimage=True
+ try:
+ keydata = self.getBookPayloadRecord('dkey', 0)
+ except DrmException, e:
+ print u"no dkey record found, book may not be encrypted"
+ print u"attempting to extrct files without a book key"
+ self.createBookDirectory()
+ self.extractFiles()
+ print u"Successfully Extracted Topaz contents"
+ if inCalibre:
+ from calibre_plugins.dedrm import genbook
+ else:
+ import genbook
+
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print u"Book Successfully generated."
+ return rv
+
+ # try each pid to decode the file
+ bookKey = None
+ for pid in pidlst:
+ # use 8 digit pids here
+ pid = pid[0:8]
+ print u"Trying: {0}".format(pid)
+ bookKeys = []
+ data = keydata
+ try:
+ bookKeys+=decryptDkeyRecords(data,pid)
+ except DrmException, e:
+ pass
+ else:
+ bookKey = bookKeys[0]
+ print u"Book Key Found! ({0})".format(bookKey.encode('hex'))
break
- data = pending.pop(0)
- self.__lock.release()
- drain.write(data)
-
- # non-blocking read of data from subprocess stdout
- def read(self):
- self.__lock.acquire()
- outdata = "".join(self.__collected_outdata)
- del self.__collected_outdata[:]
- self.__lock.release()
- return outdata
-
- # non-blocking read of data from subprocess stderr
- def readerr(self):
- self.__lock.acquire()
- errdata = "".join(self.__collected_errdata)
- del self.__collected_errdata[:]
- self.__lock.release()
- return errdata
-
- # non-blocking write to stdin of subprocess
- def write(self, data):
- if self.__process.stdin is None:
- raise ValueError("Writing to process with stdin not a pipe")
- self.__lock.acquire()
- self.__pending_input.append(data)
- self.__inputsem.release()
- self.__lock.release()
-
- # close stdinput of subprocess
- def closeinput(self):
- self.__lock.acquire()
- self.__quit = True
- self.__inputsem.release()
- self.__lock.release()
+
+ if not bookKey:
+ raise DrmException(u"No key found in {0:d} keys tried. Read the FAQs at Alf's blog: http://apprenticealf.wordpress.com/".format(len(pidlst)))
+
+ self.setBookKey(bookKey)
+ self.createBookDirectory()
+ self.extractFiles()
+ print u"Successfully Extracted Topaz contents"
+ if inCalibre:
+ from calibre_plugins.dedrm import genbook
+ else:
+ import genbook
+
+ rv = genbook.generateBook(self.outdir, raw, fixedimage)
+ if rv == 0:
+ print u"Book Successfully generated"
+ return rv
+
+ def createBookDirectory(self):
+ outdir = self.outdir
+ # create output directory structure
+ if not os.path.exists(outdir):
+ os.makedirs(outdir)
+ destdir = os.path.join(outdir,u"img")
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,u"color_img")
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,u"page")
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+ destdir = os.path.join(outdir,u"glyphs")
+ if not os.path.exists(destdir):
+ os.makedirs(destdir)
+
+ def extractFiles(self):
+ outdir = self.outdir
+ for headerRecord in self.bookHeaderRecords:
+ name = headerRecord
+ if name != 'dkey':
+ ext = u".dat"
+ if name == 'img': ext = u".jpg"
+ if name == 'color' : ext = u".jpg"
+ print u"Processing Section: {0}\n. . .".format(name),
+ for index in range (0,len(self.bookHeaderRecords[name])) :
+ fname = u"{0}{1:04d}{2}".format(name,index,ext)
+ destdir = outdir
+ if name == 'img':
+ destdir = os.path.join(outdir,u"img")
+ if name == 'color':
+ destdir = os.path.join(outdir,u"color_img")
+ if name == 'page':
+ destdir = os.path.join(outdir,u"page")
+ if name == 'glyphs':
+ destdir = os.path.join(outdir,u"glyphs")
+ outputFile = os.path.join(destdir,fname)
+ print u".",
+ record = self.getBookPayloadRecord(name,index)
+ if record != '':
+ file(outputFile, 'wb').write(record)
+ print u" "
+
+ def getFile(self, zipname):
+ htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ htmlzip.write(os.path.join(self.outdir,u"book.html"),u"book.html")
+ htmlzip.write(os.path.join(self.outdir,u"book.opf"),u"book.opf")
+ if os.path.isfile(os.path.join(self.outdir,u"cover.jpg")):
+ htmlzip.write(os.path.join(self.outdir,u"cover.jpg"),u"cover.jpg")
+ htmlzip.write(os.path.join(self.outdir,u"style.css"),u"style.css")
+ zipUpDir(htmlzip, self.outdir, u"img")
+ htmlzip.close()
+
+ def getBookType(self):
+ return u"Topaz"
+
+ def getBookExtension(self):
+ return u".htmlz"
+
+ def getSVGZip(self, zipname):
+ svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
+ svgzip.write(os.path.join(self.outdir,u"index_svg.xhtml"),u"index_svg.xhtml")
+ zipUpDir(svgzip, self.outdir, u"svg")
+ zipUpDir(svgzip, self.outdir, u"img")
+ svgzip.close()
+
+ def cleanup(self):
+ if os.path.isdir(self.outdir):
+ shutil.rmtree(self.outdir, True)
+
+def usage(progname):
+ print u"Removes DRM protection from Topaz ebooks and extracts the contents"
+ print u"Usage:"
+ print u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir>".format(progname)
+
+# Main
+def cli_main():
+ argv=unicode_argv()
+ progname = os.path.basename(argv[0])
+ print u"TopazExtract v{0}.".format(__version__)
+
+ try:
+ opts, args = getopt.getopt(argv[1:], "k:p:s:x")
+ except getopt.GetoptError, err:
+ print u"Error in options or arguments: {0}".format(err.args[0])
+ usage(progname)
+ return 1
+ if len(args)<2:
+ usage(progname)
+ return 1
+
+ infile = args[0]
+ outdir = args[1]
+ if not os.path.isfile(infile):
+ print u"Input File {0} Does Not Exist.".format(infile)
+ return 1
+
+ if not os.path.exists(outdir):
+ print u"Output Directory {0} Does Not Exist.".format(outdir)
+ return 1
+
+ kDatabaseFiles = []
+ serials = []
+ pids = []
+
+ for o, a in opts:
+ if o == '-k':
+ if a == None :
+ raise DrmException("Invalid parameter for -k")
+ kDatabaseFiles.append(a)
+ if o == '-p':
+ if a == None :
+ raise DrmException("Invalid parameter for -p")
+ pids = a.split(',')
+ if o == '-s':
+ if a == None :
+ raise DrmException("Invalid parameter for -s")
+ serials = [serial.replace(" ","") for serial in a.split(',')]
+
+ bookname = os.path.splitext(os.path.basename(infile))[0]
+
+ tb = TopazBook(infile)
+ title = tb.getBookTitle()
+ print u"Processing Book: {0}".format(title)
+ md1, md2 = tb.getPIDMetaInfo()
+ pids.extend(kgenpids.getPidList(md1, md2, serials, kDatabaseFiles))
+
+ try:
+ print u"Decrypting Book"
+ tb.processBook(pids)
+
+ print u" Creating HTML ZIP Archive"
+ zipname = os.path.join(outdir, bookname + u"_nodrm.htmlz")
+ tb.getFile(zipname)
+
+ print u" Creating SVG ZIP Archive"
+ zipname = os.path.join(outdir, bookname + u"_SVG.zip")
+ tb.getSVGZip(zipname)
+
+ # removing internal temporary directory of pieces
+ tb.cleanup()
+
+ except DrmException, e:
+ print u"Decryption failed\n{0}".format(traceback.format_exc())
+
+ try:
+ tb.cleanup()
+ except:
+ pass
+ return 1
+
+ except Exception, e:
+ print u"Decryption failed\m{0}".format(traceback.format_exc())
+ try:
+ tb.cleanup()
+ except:
+ pass
+ return 1
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.stdout=SafeUnbuffered(sys.stdout)
+ sys.stderr=SafeUnbuffered(sys.stderr)
+ sys.exit(cli_main())
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py b/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py
index 97f6583..c730607 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/utilities.py
@@ -1,538 +1,39 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-# topazextract.py
-# Mostly written by some_updates based on code from many others
-
-# Changelog
-# 4.9 - moved unicode_argv call inside main for Windows DeDRM compatibility
-# 5.0 - Fixed potential unicode problem with command line interface
-
-__version__ = '5.0'
-
-import sys
-import os, csv, getopt
-import zlib, zipfile, tempfile, shutil
-import traceback
-from struct import pack
-from struct import unpack
-from alfcrypto import Topaz_Cipher
-
-class SafeUnbuffered:
- def __init__(self, stream):
- self.stream = stream
- self.encoding = stream.encoding
- if self.encoding == None:
- self.encoding = "utf-8"
- def write(self, data):
- if isinstance(data,unicode):
- data = data.encode(self.encoding,"replace")
- self.stream.write(data)
- self.stream.flush()
- def __getattr__(self, attr):
- return getattr(self.stream, attr)
-
-iswindows = sys.platform.startswith('win')
-isosx = sys.platform.startswith('darwin')
-
-def unicode_argv():
- if iswindows:
- # Uses shell32.GetCommandLineArgvW to get sys.argv as a list of Unicode
- # strings.
-
- # Versions 2.x of Python don't support Unicode in sys.argv on
- # Windows, with the underlying Windows API instead replacing multi-byte
- # characters with '?'.
-
-
- from ctypes import POINTER, byref, cdll, c_int, windll
- from ctypes.wintypes import LPCWSTR, LPWSTR
-
- GetCommandLineW = cdll.kernel32.GetCommandLineW
- GetCommandLineW.argtypes = []
- GetCommandLineW.restype = LPCWSTR
-
- CommandLineToArgvW = windll.shell32.CommandLineToArgvW
- CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
- CommandLineToArgvW.restype = POINTER(LPWSTR)
-
- cmd = GetCommandLineW()
- argc = c_int(0)
- argv = CommandLineToArgvW(cmd, byref(argc))
- if argc.value > 0:
- # Remove Python executable and commands if present
- start = argc.value - len(sys.argv)
- return [argv[i] for i in
- xrange(start, argc.value)]
- # if we don't have any arguments at all, just pass back script name
- # this should never happen
- return [u"mobidedrm.py"]
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+DETAILED_MESSAGE = \
+'You have personal information stored in this plugin\'s customization '+ \
+'string from a previous version of this plugin.\n\n'+ \
+'This new version of the plugin can convert that info '+ \
+'into key data that the new plugin can then use (which doesn\'t '+ \
+'require personal information to be stored/displayed in an insecure '+ \
+'manner like the old plugin did).\n\nIf you choose NOT to migrate this data at this time '+ \
+'you will be prompted to save that personal data to a file elsewhere; and you\'ll have '+ \
+'to manually re-configure this plugin with your information.\n\nEither way... ' + \
+'this new version of the plugin will not be responsible for storing that personal '+ \
+'info in plain sight any longer.'
+
+def uStrCmp (s1, s2, caseless=False):
+ import unicodedata as ud
+ str1 = s1 if isinstance(s1, unicode) else unicode(s1)
+ str2 = s2 if isinstance(s2, unicode) else unicode(s2)
+ if caseless:
+ return ud.normalize('NFC', str1.lower()) == ud.normalize('NFC', str2.lower())
else:
- argvencoding = sys.stdin.encoding
- if argvencoding == None:
- argvencoding = 'utf-8'
- return [arg if (type(arg) == unicode) else unicode(arg,argvencoding) for arg in sys.argv]
-
-#global switch
-debug = False
-
-if 'calibre' in sys.modules:
- inCalibre = True
- from calibre_plugins.dedrm import kgenpids
-else:
- inCalibre = False
- import kgenpids
-
-
-class DrmException(Exception):
- pass
-
-
-# recursive zip creation support routine
-def zipUpDir(myzip, tdir, localname):
- currentdir = tdir
- if localname != u"":
- currentdir = os.path.join(currentdir,localname)
- list = os.listdir(currentdir)
- for file in list:
- afilename = file
- localfilePath = os.path.join(localname, afilename)
- realfilePath = os.path.join(currentdir,file)
- if os.path.isfile(realfilePath):
- myzip.write(realfilePath, localfilePath)
- elif os.path.isdir(realfilePath):
- zipUpDir(myzip, tdir, localfilePath)
-
-#
-# Utility routines
-#
-
-# Get a 7 bit encoded number from file
-def bookReadEncodedNumber(fo):
- flag = False
- data = ord(fo.read(1))
- if data == 0xFF:
- flag = True
- data = ord(fo.read(1))
- if data >= 0x80:
- datax = (data & 0x7F)
- while data >= 0x80 :
- data = ord(fo.read(1))
- datax = (datax <<7) + (data & 0x7F)
- data = datax
- if flag:
- data = -data
- return data
-
-# Get a length prefixed string from file
-def bookReadString(fo):
- stringLength = bookReadEncodedNumber(fo)
- return unpack(str(stringLength)+'s',fo.read(stringLength))[0]
+ return ud.normalize('NFC', str1) == ud.normalize('NFC', str2)
-#
-# crypto routines
-#
-
-# Context initialisation for the Topaz Crypto
-def topazCryptoInit(key):
- return Topaz_Cipher().ctx_init(key)
-
-# ctx1 = 0x0CAFFE19E
-# for keyChar in key:
-# keyByte = ord(keyChar)
-# ctx2 = ctx1
-# ctx1 = ((((ctx1 >>2) * (ctx1 >>7))&0xFFFFFFFF) ^ (keyByte * keyByte * 0x0F902007)& 0xFFFFFFFF )
-# return [ctx1,ctx2]
-
-# decrypt data with the context prepared by topazCryptoInit()
-def topazCryptoDecrypt(data, ctx):
- return Topaz_Cipher().decrypt(data, ctx)
-# ctx1 = ctx[0]
-# ctx2 = ctx[1]
-# plainText = ""
-# for dataChar in data:
-# dataByte = ord(dataChar)
-# m = (dataByte ^ ((ctx1 >> 3) &0xFF) ^ ((ctx2<<3) & 0xFF)) &0xFF
-# ctx2 = ctx1
-# ctx1 = (((ctx1 >> 2) * (ctx1 >> 7)) &0xFFFFFFFF) ^((m * m * 0x0F902007) &0xFFFFFFFF)
-# plainText += chr(m)
-# return plainText
-
-# Decrypt data with the PID
-def decryptRecord(data,PID):
- ctx = topazCryptoInit(PID)
- return topazCryptoDecrypt(data, ctx)
-
-# Try to decrypt a dkey record (contains the bookPID)
-def decryptDkeyRecord(data,PID):
- record = decryptRecord(data,PID)
- fields = unpack('3sB8sB8s3s',record)
- if fields[0] != 'PID' or fields[5] != 'pid' :
- raise DrmException(u"Didn't find PID magic numbers in record")
- elif fields[1] != 8 or fields[3] != 8 :
- raise DrmException(u"Record didn't contain correct length fields")
- elif fields[2] != PID :
- raise DrmException(u"Record didn't contain PID")
- return fields[4]
-
-# Decrypt all dkey records (contain the book PID)
-def decryptDkeyRecords(data,PID):
- nbKeyRecords = ord(data[0])
- records = []
- data = data[1:]
- for i in range (0,nbKeyRecords):
- length = ord(data[0])
+def parseCustString(keystuff):
+ userkeys = []
+ ar = keystuff.split(':')
+ for i in ar:
try:
- key = decryptDkeyRecord(data[1:length+1],PID)
- records.append(key)
- except DrmException:
- pass
- data = data[1+length:]
- if len(records) == 0:
- raise DrmException(u"BookKey Not Found")
- return records
-
-
-class TopazBook:
- def __init__(self, filename):
- self.fo = file(filename, 'rb')
- self.outdir = tempfile.mkdtemp()
- # self.outdir = 'rawdat'
- self.bookPayloadOffset = 0
- self.bookHeaderRecords = {}
- self.bookMetadata = {}
- self.bookKey = None
- magic = unpack('4s',self.fo.read(4))[0]
- if magic != 'TPZ0':
- raise DrmException(u"Parse Error : Invalid Header, not a Topaz file")
- self.parseTopazHeaders()
- self.parseMetadata()
-
- def parseTopazHeaders(self):
- def bookReadHeaderRecordData():
- # Read and return the data of one header record at the current book file position
- # [[offset,decompressedLength,compressedLength],...]
- nbValues = bookReadEncodedNumber(self.fo)
- if debug: print "%d records in header " % nbValues,
- values = []
- for i in range (0,nbValues):
- values.append([bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo),bookReadEncodedNumber(self.fo)])
- return values
- def parseTopazHeaderRecord():
- # Read and parse one header record at the current book file position and return the associated data
- # [[offset,decompressedLength,compressedLength],...]
- if ord(self.fo.read(1)) != 0x63:
- raise DrmException(u"Parse Error : Invalid Header")
- tag = bookReadString(self.fo)
- record = bookReadHeaderRecordData()
- return [tag,record]
- nbRecords = bookReadEncodedNumber(self.fo)
- if debug: print "Headers: %d" % nbRecords
- for i in range (0,nbRecords):
- result = parseTopazHeaderRecord()
- if debug: print result[0], ": ", result[1]
- self.bookHeaderRecords[result[0]] = result[1]
- if ord(self.fo.read(1)) != 0x64 :
- raise DrmException(u"Parse Error : Invalid Header")
- self.bookPayloadOffset = self.fo.tell()
-
- def parseMetadata(self):
- # Parse the metadata record from the book payload and return a list of [key,values]
- self.fo.seek(self.bookPayloadOffset + self.bookHeaderRecords['metadata'][0][0])
- tag = bookReadString(self.fo)
- if tag != 'metadata' :
- raise DrmException(u"Parse Error : Record Names Don't Match")
- flags = ord(self.fo.read(1))
- nbRecords = ord(self.fo.read(1))
- if debug: print "Metadata Records: %d" % nbRecords
- for i in range (0,nbRecords) :
- keyval = bookReadString(self.fo)
- content = bookReadString(self.fo)
- if debug: print keyval
- if debug: print content
- self.bookMetadata[keyval] = content
- return self.bookMetadata
-
- def getPIDMetaInfo(self):
- keysRecord = self.bookMetadata.get('keys','')
- keysRecordRecord = ''
- if keysRecord != '':
- keylst = keysRecord.split(',')
- for keyval in keylst:
- keysRecordRecord += self.bookMetadata.get(keyval,'')
- return keysRecord, keysRecordRecord
-
- def getBookTitle(self):
- title = ''
- if 'Title' in self.bookMetadata:
- title = self.bookMetadata['Title']
- return title.decode('utf-8')
-
- def setBookKey(self, key):
- self.bookKey = key
-
- def getBookPayloadRecord(self, name, index):
- # Get a record in the book payload, given its name and index.
- # decrypted and decompressed if necessary
- encrypted = False
- compressed = False
- try:
- recordOffset = self.bookHeaderRecords[name][index][0]
- except:
- raise DrmException("Parse Error : Invalid Record, record not found")
-
- self.fo.seek(self.bookPayloadOffset + recordOffset)
-
- tag = bookReadString(self.fo)
- if tag != name :
- raise DrmException("Parse Error : Invalid Record, record name doesn't match")
-
- recordIndex = bookReadEncodedNumber(self.fo)
- if recordIndex < 0 :
- encrypted = True
- recordIndex = -recordIndex -1
-
- if recordIndex != index :
- raise DrmException("Parse Error : Invalid Record, index doesn't match")
-
- if (self.bookHeaderRecords[name][index][2] > 0):
- compressed = True
- record = self.fo.read(self.bookHeaderRecords[name][index][2])
- else:
- record = self.fo.read(self.bookHeaderRecords[name][index][1])
-
- if encrypted:
- if self.bookKey:
- ctx = topazCryptoInit(self.bookKey)
- record = topazCryptoDecrypt(record,ctx)
- else :
- raise DrmException("Error: Attempt to decrypt without bookKey")
-
- if compressed:
- record = zlib.decompress(record)
-
- return record
-
- def processBook(self, pidlst):
- raw = 0
- fixedimage=True
- try:
- keydata = self.getBookPayloadRecord('dkey', 0)
- except DrmException, e:
- print u"no dkey record found, book may not be encrypted"
- print u"attempting to extrct files without a book key"
- self.createBookDirectory()
- self.extractFiles()
- print u"Successfully Extracted Topaz contents"
- if inCalibre:
- from calibre_plugins.dedrm import genbook
- else:
- import genbook
-
- rv = genbook.generateBook(self.outdir, raw, fixedimage)
- if rv == 0:
- print u"Book Successfully generated."
- return rv
-
- # try each pid to decode the file
- bookKey = None
- for pid in pidlst:
- # use 8 digit pids here
- pid = pid[0:8]
- print u"Trying: {0}".format(pid)
- bookKeys = []
- data = keydata
- try:
- bookKeys+=decryptDkeyRecords(data,pid)
- except DrmException, e:
- pass
- else:
- bookKey = bookKeys[0]
- print u"Book Key Found! ({0})".format(bookKey.encode('hex'))
- break
-
- if not bookKey:
- raise DrmException(u"No key found in {0:d} keys tried. Read the FAQs at Alf's blog: http://apprenticealf.wordpress.com/".format(len(pidlst)))
-
- self.setBookKey(bookKey)
- self.createBookDirectory()
- self.extractFiles()
- print u"Successfully Extracted Topaz contents"
- if inCalibre:
- from calibre_plugins.dedrm import genbook
- else:
- import genbook
-
- rv = genbook.generateBook(self.outdir, raw, fixedimage)
- if rv == 0:
- print u"Book Successfully generated"
- return rv
-
- def createBookDirectory(self):
- outdir = self.outdir
- # create output directory structure
- if not os.path.exists(outdir):
- os.makedirs(outdir)
- destdir = os.path.join(outdir,u"img")
- if not os.path.exists(destdir):
- os.makedirs(destdir)
- destdir = os.path.join(outdir,u"color_img")
- if not os.path.exists(destdir):
- os.makedirs(destdir)
- destdir = os.path.join(outdir,u"page")
- if not os.path.exists(destdir):
- os.makedirs(destdir)
- destdir = os.path.join(outdir,u"glyphs")
- if not os.path.exists(destdir):
- os.makedirs(destdir)
-
- def extractFiles(self):
- outdir = self.outdir
- for headerRecord in self.bookHeaderRecords:
- name = headerRecord
- if name != 'dkey':
- ext = u".dat"
- if name == 'img': ext = u".jpg"
- if name == 'color' : ext = u".jpg"
- print u"Processing Section: {0}\n. . .".format(name),
- for index in range (0,len(self.bookHeaderRecords[name])) :
- fname = u"{0}{1:04d}{2}".format(name,index,ext)
- destdir = outdir
- if name == 'img':
- destdir = os.path.join(outdir,u"img")
- if name == 'color':
- destdir = os.path.join(outdir,u"color_img")
- if name == 'page':
- destdir = os.path.join(outdir,u"page")
- if name == 'glyphs':
- destdir = os.path.join(outdir,u"glyphs")
- outputFile = os.path.join(destdir,fname)
- print u".",
- record = self.getBookPayloadRecord(name,index)
- if record != '':
- file(outputFile, 'wb').write(record)
- print u" "
-
- def getFile(self, zipname):
- htmlzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
- htmlzip.write(os.path.join(self.outdir,u"book.html"),u"book.html")
- htmlzip.write(os.path.join(self.outdir,u"book.opf"),u"book.opf")
- if os.path.isfile(os.path.join(self.outdir,u"cover.jpg")):
- htmlzip.write(os.path.join(self.outdir,u"cover.jpg"),u"cover.jpg")
- htmlzip.write(os.path.join(self.outdir,u"style.css"),u"style.css")
- zipUpDir(htmlzip, self.outdir, u"img")
- htmlzip.close()
-
- def getBookType(self):
- return u"Topaz"
-
- def getBookExtension(self):
- return u".htmlz"
-
- def getSVGZip(self, zipname):
- svgzip = zipfile.ZipFile(zipname,'w',zipfile.ZIP_DEFLATED, False)
- svgzip.write(os.path.join(self.outdir,u"index_svg.xhtml"),u"index_svg.xhtml")
- zipUpDir(svgzip, self.outdir, u"svg")
- zipUpDir(svgzip, self.outdir, u"img")
- svgzip.close()
-
- def cleanup(self):
- if os.path.isdir(self.outdir):
- shutil.rmtree(self.outdir, True)
-
-def usage(progname):
- print u"Removes DRM protection from Topaz ebooks and extracts the contents"
- print u"Usage:"
- print u" {0} [-k <kindle.k4i>] [-p <comma separated PIDs>] [-s <comma separated Kindle serial numbers>] <infile> <outdir>".format(progname)
-
-# Main
-def cli_main():
- argv=unicode_argv()
- progname = os.path.basename(argv[0])
- print u"TopazExtract v{0}.".format(__version__)
-
- try:
- opts, args = getopt.getopt(argv[1:], "k:p:s:x")
- except getopt.GetoptError, err:
- print u"Error in options or arguments: {0}".format(err.args[0])
- usage(progname)
- return 1
- if len(args)<2:
- usage(progname)
- return 1
-
- infile = args[0]
- outdir = args[1]
- if not os.path.isfile(infile):
- print u"Input File {0} Does Not Exist.".format(infile)
- return 1
-
- if not os.path.exists(outdir):
- print u"Output Directory {0} Does Not Exist.".format(outdir)
- return 1
-
- kDatabaseFiles = []
- serials = []
- pids = []
-
- for o, a in opts:
- if o == '-k':
- if a == None :
- raise DrmException("Invalid parameter for -k")
- kDatabaseFiles.append(a)
- if o == '-p':
- if a == None :
- raise DrmException("Invalid parameter for -p")
- pids = a.split(',')
- if o == '-s':
- if a == None :
- raise DrmException("Invalid parameter for -s")
- serials = [serial.replace(" ","") for serial in a.split(',')]
-
- bookname = os.path.splitext(os.path.basename(infile))[0]
-
- tb = TopazBook(infile)
- title = tb.getBookTitle()
- print u"Processing Book: {0}".format(title)
- md1, md2 = tb.getPIDMetaInfo()
- pids.extend(kgenpids.getPidList(md1, md2, serials, kDatabaseFiles))
-
- try:
- print u"Decrypting Book"
- tb.processBook(pids)
-
- print u" Creating HTML ZIP Archive"
- zipname = os.path.join(outdir, bookname + u"_nodrm.htmlz")
- tb.getFile(zipname)
-
- print u" Creating SVG ZIP Archive"
- zipname = os.path.join(outdir, bookname + u"_SVG.zip")
- tb.getSVGZip(zipname)
-
- # removing internal temporary directory of pieces
- tb.cleanup()
-
- except DrmException, e:
- print u"Decryption failed\n{0}".format(traceback.format_exc())
-
- try:
- tb.cleanup()
- except:
- pass
- return 1
-
- except Exception, e:
- print u"Decryption failed\m{0}".format(traceback.format_exc())
- try:
- tb.cleanup()
+ name, ccn = i.split(',')
+ # Generate Barnes & Noble EPUB user key from name and credit card number.
+ userkeys.append(generate_key(name, ccn))
except:
pass
- return 1
-
- return 0
-
-
-if __name__ == '__main__':
- sys.stdout=SafeUnbuffered(sys.stdout)
- sys.stderr=SafeUnbuffered(sys.stderr)
- sys.exit(cli_main())
+ return userkeys
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py b/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py
new file mode 100644
index 0000000..b54db80
--- /dev/null
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/wineutils.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+__license__ = 'GPL v3'
+
+# Standard Python modules.
+import os, sys, re, hashlib
+from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION
+
+def WineGetKeys(scriptpath, extension, wineprefix=""):
+ import subprocess
+ from subprocess import Popen, PIPE, STDOUT
+
+ import subasyncio
+ from subasyncio import Process
+
+ if extension == u".k4i":
+ import json
+
+ basepath, script = os.path.split(scriptpath)
+ print u"{0} v{1}: Running {2} under Wine".format(PLUGIN_NAME, PLUGIN_VERSION, script)
+
+ outdirpath = os.path.join(basepath, u"winekeysdir")
+ if not os.path.exists(outdirpath):
+ os.makedirs(outdirpath)
+
+ if wineprefix != "" and os.path.exists(wineprefix):
+ cmdline = u"WINEPREFIX=\"{2}\" wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath,wineprefix)
+ else:
+ cmdline = u"wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath)
+ print u"{0} v{1}: Command line: “{2}”".format(PLUGIN_NAME, PLUGIN_VERSION, cmdline)
+
+ try:
+ cmdline = cmdline.encode(sys.getfilesystemencoding())
+ p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=sys.stdout, stderr=STDOUT, close_fds=False)
+ result = p2.wait("wait")
+ except Exception, e:
+ print u"{0} v{1}: Wine subprocess call error: {2}".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0])
+ return []
+
+ winekeys = []
+ # get any files with extension in the output dir
+ files = [f for f in os.listdir(outdirpath) if f.endswith(extension)]
+ for filename in files:
+ try:
+ fpath = os.path.join(outdirpath, filename)
+ with open(fpath, 'rb') as keyfile:
+ if extension == u".k4i":
+ new_key_value = json.loads(keyfile.read())
+ else:
+ new_key_value = keyfile.read()
+ winekeys.append(new_key_value)
+ except:
+ print u"{0} v{1}: Error loading file {2}".format(PLUGIN_NAME, PLUGIN_VERSION, filename)
+ traceback.print_exc()
+ os.remove(fpath)
+ print u"{0} v{1}: Found and decrypted {2} {3}".format(PLUGIN_NAME, PLUGIN_VERSION, len(winekeys), u"key file" if len(winekeys) == 1 else u"key files")
+ return winekeys
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py b/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py
index c730607..4a55a69 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/zipfilerugged.py
@@ -1,39 +1,1400 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from __future__ import with_statement
-
-__license__ = 'GPL v3'
-
-DETAILED_MESSAGE = \
-'You have personal information stored in this plugin\'s customization '+ \
-'string from a previous version of this plugin.\n\n'+ \
-'This new version of the plugin can convert that info '+ \
-'into key data that the new plugin can then use (which doesn\'t '+ \
-'require personal information to be stored/displayed in an insecure '+ \
-'manner like the old plugin did).\n\nIf you choose NOT to migrate this data at this time '+ \
-'you will be prompted to save that personal data to a file elsewhere; and you\'ll have '+ \
-'to manually re-configure this plugin with your information.\n\nEither way... ' + \
-'this new version of the plugin will not be responsible for storing that personal '+ \
-'info in plain sight any longer.'
-
-def uStrCmp (s1, s2, caseless=False):
- import unicodedata as ud
- str1 = s1 if isinstance(s1, unicode) else unicode(s1)
- str2 = s2 if isinstance(s2, unicode) else unicode(s2)
- if caseless:
- return ud.normalize('NFC', str1.lower()) == ud.normalize('NFC', str2.lower())
- else:
- return ud.normalize('NFC', str1) == ud.normalize('NFC', str2)
-
-def parseCustString(keystuff):
- userkeys = []
- ar = keystuff.split(':')
- for i in ar:
- try:
- name, ccn = i.split(',')
- # Generate Barnes & Noble EPUB user key from name and credit card number.
- userkeys.append(generate_key(name, ccn))
- except:
+"""
+Read and write ZIP files.
+"""
+import struct, os, time, sys, shutil
+import binascii, cStringIO, stat
+import io
+import re
+
+try:
+ import zlib # We may need its compression method
+ crc32 = zlib.crc32
+except ImportError:
+ zlib = None
+ crc32 = binascii.crc32
+
+__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
+ "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
+
+class BadZipfile(Exception):
+ pass
+
+
+class LargeZipFile(Exception):
+ """
+ Raised when writing a zipfile, the zipfile requires ZIP64 extensions
+ and those extensions are disabled.
+ """
+
+error = BadZipfile # The exception raised by this module
+
+ZIP64_LIMIT = (1 << 31) - 1
+ZIP_FILECOUNT_LIMIT = 1 << 16
+ZIP_MAX_COMMENT = (1 << 16) - 1
+
+# constants for Zip file compression methods
+ZIP_STORED = 0
+ZIP_DEFLATED = 8
+# Other ZIP compression methods not supported
+
+# Below are some formats and associated data for reading/writing headers using
+# the struct module. The names and structures of headers/records are those used
+# in the PKWARE description of the ZIP file format:
+# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+# (URL valid as of January 2008)
+
+# The "end of central directory" structure, magic number, size, and indices
+# (section V.I in the format document)
+structEndArchive = "<4s4H2LH"
+stringEndArchive = "PK\005\006"
+sizeEndCentDir = struct.calcsize(structEndArchive)
+
+_ECD_SIGNATURE = 0
+_ECD_DISK_NUMBER = 1
+_ECD_DISK_START = 2
+_ECD_ENTRIES_THIS_DISK = 3
+_ECD_ENTRIES_TOTAL = 4
+_ECD_SIZE = 5
+_ECD_OFFSET = 6
+_ECD_COMMENT_SIZE = 7
+# These last two indices are not part of the structure as defined in the
+# spec, but they are used internally by this module as a convenience
+_ECD_COMMENT = 8
+_ECD_LOCATION = 9
+
+# The "central directory" structure, magic number, size, and indices
+# of entries in the structure (section V.F in the format document)
+structCentralDir = "<4s4B4HL2L5H2L"
+stringCentralDir = "PK\001\002"
+sizeCentralDir = struct.calcsize(structCentralDir)
+
+# indexes of entries in the central directory structure
+_CD_SIGNATURE = 0
+_CD_CREATE_VERSION = 1
+_CD_CREATE_SYSTEM = 2
+_CD_EXTRACT_VERSION = 3
+_CD_EXTRACT_SYSTEM = 4
+_CD_FLAG_BITS = 5
+_CD_COMPRESS_TYPE = 6
+_CD_TIME = 7
+_CD_DATE = 8
+_CD_CRC = 9
+_CD_COMPRESSED_SIZE = 10
+_CD_UNCOMPRESSED_SIZE = 11
+_CD_FILENAME_LENGTH = 12
+_CD_EXTRA_FIELD_LENGTH = 13
+_CD_COMMENT_LENGTH = 14
+_CD_DISK_NUMBER_START = 15
+_CD_INTERNAL_FILE_ATTRIBUTES = 16
+_CD_EXTERNAL_FILE_ATTRIBUTES = 17
+_CD_LOCAL_HEADER_OFFSET = 18
+
+# The "local file header" structure, magic number, size, and indices
+# (section V.A in the format document)
+structFileHeader = "<4s2B4HL2L2H"
+stringFileHeader = "PK\003\004"
+sizeFileHeader = struct.calcsize(structFileHeader)
+
+_FH_SIGNATURE = 0
+_FH_EXTRACT_VERSION = 1
+_FH_EXTRACT_SYSTEM = 2
+_FH_GENERAL_PURPOSE_FLAG_BITS = 3
+_FH_COMPRESSION_METHOD = 4
+_FH_LAST_MOD_TIME = 5
+_FH_LAST_MOD_DATE = 6
+_FH_CRC = 7
+_FH_COMPRESSED_SIZE = 8
+_FH_UNCOMPRESSED_SIZE = 9
+_FH_FILENAME_LENGTH = 10
+_FH_EXTRA_FIELD_LENGTH = 11
+
+# The "Zip64 end of central directory locator" structure, magic number, and size
+structEndArchive64Locator = "<4sLQL"
+stringEndArchive64Locator = "PK\x06\x07"
+sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
+
+# The "Zip64 end of central directory" record, magic number, size, and indices
+# (section V.G in the format document)
+structEndArchive64 = "<4sQ2H2L4Q"
+stringEndArchive64 = "PK\x06\x06"
+sizeEndCentDir64 = struct.calcsize(structEndArchive64)
+
+_CD64_SIGNATURE = 0
+_CD64_DIRECTORY_RECSIZE = 1
+_CD64_CREATE_VERSION = 2
+_CD64_EXTRACT_VERSION = 3
+_CD64_DISK_NUMBER = 4
+_CD64_DISK_NUMBER_START = 5
+_CD64_NUMBER_ENTRIES_THIS_DISK = 6
+_CD64_NUMBER_ENTRIES_TOTAL = 7
+_CD64_DIRECTORY_SIZE = 8
+_CD64_OFFSET_START_CENTDIR = 9
+
+def _check_zipfile(fp):
+ try:
+ if _EndRecData(fp):
+ return True # file has correct magic number
+ except IOError:
+ pass
+ return False
+
+def is_zipfile(filename):
+ """Quickly see if a file is a ZIP file by checking the magic number.
+
+ The filename argument may be a file or file-like object too.
+ """
+ result = False
+ try:
+ if hasattr(filename, "read"):
+ result = _check_zipfile(fp=filename)
+ else:
+ with open(filename, "rb") as fp:
+ result = _check_zipfile(fp)
+ except IOError:
+ pass
+ return result
+
+def _EndRecData64(fpin, offset, endrec):
+ """
+ Read the ZIP64 end-of-archive records and use that to update endrec
+ """
+ fpin.seek(offset - sizeEndCentDir64Locator, 2)
+ data = fpin.read(sizeEndCentDir64Locator)
+ sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
+ if sig != stringEndArchive64Locator:
+ return endrec
+
+ if diskno != 0 or disks != 1:
+ raise BadZipfile("zipfiles that span multiple disks are not supported")
+
+ # Assume no 'zip64 extensible data'
+ fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
+ data = fpin.read(sizeEndCentDir64)
+ sig, sz, create_version, read_version, disk_num, disk_dir, \
+ dircount, dircount2, dirsize, diroffset = \
+ struct.unpack(structEndArchive64, data)
+ if sig != stringEndArchive64:
+ return endrec
+
+ # Update the original endrec using data from the ZIP64 record
+ endrec[_ECD_SIGNATURE] = sig
+ endrec[_ECD_DISK_NUMBER] = disk_num
+ endrec[_ECD_DISK_START] = disk_dir
+ endrec[_ECD_ENTRIES_THIS_DISK] = dircount
+ endrec[_ECD_ENTRIES_TOTAL] = dircount2
+ endrec[_ECD_SIZE] = dirsize
+ endrec[_ECD_OFFSET] = diroffset
+ return endrec
+
+
+def _EndRecData(fpin):
+ """Return data from the "End of Central Directory" record, or None.
+
+ The data is a list of the nine items in the ZIP "End of central dir"
+ record followed by a tenth item, the file seek offset of this record."""
+
+ # Determine file size
+ fpin.seek(0, 2)
+ filesize = fpin.tell()
+
+ # Check to see if this is ZIP file with no archive comment (the
+ # "end of central directory" structure should be the last item in the
+ # file if this is the case).
+ try:
+ fpin.seek(-sizeEndCentDir, 2)
+ except IOError:
+ return None
+ data = fpin.read()
+ if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
+ # the signature is correct and there's no comment, unpack structure
+ endrec = struct.unpack(structEndArchive, data)
+ endrec=list(endrec)
+
+ # Append a blank comment and record start offset
+ endrec.append("")
+ endrec.append(filesize - sizeEndCentDir)
+
+ # Try to read the "Zip64 end of central directory" structure
+ return _EndRecData64(fpin, -sizeEndCentDir, endrec)
+
+ # Either this is not a ZIP file, or it is a ZIP file with an archive
+ # comment. Search the end of the file for the "end of central directory"
+ # record signature. The comment is the last item in the ZIP file and may be
+ # up to 64K long. It is assumed that the "end of central directory" magic
+ # number does not appear in the comment.
+ maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
+ fpin.seek(maxCommentStart, 0)
+ data = fpin.read()
+ start = data.rfind(stringEndArchive)
+ if start >= 0:
+ # found the magic number; attempt to unpack and interpret
+ recData = data[start:start+sizeEndCentDir]
+ endrec = list(struct.unpack(structEndArchive, recData))
+ comment = data[start+sizeEndCentDir:]
+ # check that comment length is correct
+ if endrec[_ECD_COMMENT_SIZE] == len(comment):
+ # Append the archive comment and start offset
+ endrec.append(comment)
+ endrec.append(maxCommentStart + start)
+
+ # Try to read the "Zip64 end of central directory" structure
+ return _EndRecData64(fpin, maxCommentStart + start - filesize,
+ endrec)
+
+ # Unable to find a valid end of central directory structure
+ return
+
+
+class ZipInfo (object):
+ """Class with attributes describing each file in the ZIP archive."""
+
+ __slots__ = (
+ 'orig_filename',
+ 'filename',
+ 'date_time',
+ 'compress_type',
+ 'comment',
+ 'extra',
+ 'create_system',
+ 'create_version',
+ 'extract_version',
+ 'reserved',
+ 'flag_bits',
+ 'volume',
+ 'internal_attr',
+ 'external_attr',
+ 'header_offset',
+ 'CRC',
+ 'compress_size',
+ 'file_size',
+ '_raw_time',
+ )
+
+ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
+ self.orig_filename = filename # Original file name in archive
+
+ # Terminate the file name at the first null byte. Null bytes in file
+ # names are used as tricks by viruses in archives.
+ null_byte = filename.find(chr(0))
+ if null_byte >= 0:
+ filename = filename[0:null_byte]
+ # This is used to ensure paths in generated ZIP files always use
+ # forward slashes as the directory separator, as required by the
+ # ZIP format specification.
+ if os.sep != "/" and os.sep in filename:
+ filename = filename.replace(os.sep, "/")
+
+ self.filename = filename # Normalized file name
+ self.date_time = date_time # year, month, day, hour, min, sec
+ # Standard values:
+ self.compress_type = ZIP_STORED # Type of compression for the file
+ self.comment = "" # Comment for each file
+ self.extra = "" # ZIP extra data
+ if sys.platform == 'win32':
+ self.create_system = 0 # System which created ZIP archive
+ else:
+ # Assume everything else is unix-y
+ self.create_system = 3 # System which created ZIP archive
+ self.create_version = 20 # Version which created ZIP archive
+ self.extract_version = 20 # Version needed to extract archive
+ self.reserved = 0 # Must be zero
+ self.flag_bits = 0 # ZIP flag bits
+ self.volume = 0 # Volume number of file header
+ self.internal_attr = 0 # Internal attributes
+ self.external_attr = 0 # External file attributes
+ # Other attributes are set by class ZipFile:
+ # header_offset Byte offset to the file header
+ # CRC CRC-32 of the uncompressed file
+ # compress_size Size of the compressed file
+ # file_size Size of the uncompressed file
+
+ def FileHeader(self):
+ """Return the per-file header as a string."""
+ dt = self.date_time
+ dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+ dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+ if self.flag_bits & 0x08:
+ # Set these to zero because we write them after the file data
+ CRC = compress_size = file_size = 0
+ else:
+ CRC = self.CRC
+ compress_size = self.compress_size
+ file_size = self.file_size
+
+ extra = self.extra
+
+ if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
+ # File is larger than what fits into a 4 byte integer,
+ # fall back to the ZIP64 extension
+ fmt = '<HHQQ'
+ extra = extra + struct.pack(fmt,
+ 1, struct.calcsize(fmt)-4, file_size, compress_size)
+ file_size = 0xffffffff
+ compress_size = 0xffffffff
+ self.extract_version = max(45, self.extract_version)
+ self.create_version = max(45, self.extract_version)
+
+ filename, flag_bits = self._encodeFilenameFlags()
+ header = struct.pack(structFileHeader, stringFileHeader,
+ self.extract_version, self.reserved, flag_bits,
+ self.compress_type, dostime, dosdate, CRC,
+ compress_size, file_size,
+ len(filename), len(extra))
+ return header + filename + extra
+
+ def _encodeFilenameFlags(self):
+ if isinstance(self.filename, unicode):
+ try:
+ return self.filename.encode('ascii'), self.flag_bits
+ except UnicodeEncodeError:
+ return self.filename.encode('utf-8'), self.flag_bits | 0x800
+ else:
+ return self.filename, self.flag_bits
+
+ def _decodeFilename(self):
+ if self.flag_bits & 0x800:
+ try:
+ #print "decoding filename",self.filename
+ return self.filename.decode('utf-8')
+ except:
+ return self.filename
+ else:
+ return self.filename
+
+ def _decodeExtra(self):
+ # Try to decode the extra field.
+ extra = self.extra
+ unpack = struct.unpack
+ while extra:
+ tp, ln = unpack('<HH', extra[:4])
+ if tp == 1:
+ if ln >= 24:
+ counts = unpack('<QQQ', extra[4:28])
+ elif ln == 16:
+ counts = unpack('<QQ', extra[4:20])
+ elif ln == 8:
+ counts = unpack('<Q', extra[4:12])
+ elif ln == 0:
+ counts = ()
+ else:
+ raise RuntimeError, "Corrupt extra field %s"%(ln,)
+
+ idx = 0
+
+ # ZIP64 extension (large files and/or large archives)
+ if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
+ self.file_size = counts[idx]
+ idx += 1
+
+ if self.compress_size == 0xFFFFFFFFL:
+ self.compress_size = counts[idx]
+ idx += 1
+
+ if self.header_offset == 0xffffffffL:
+ old = self.header_offset
+ self.header_offset = counts[idx]
+ idx+=1
+
+ extra = extra[ln+4:]
+
+
+class _ZipDecrypter:
+ """Class to handle decryption of files stored within a ZIP archive.
+
+ ZIP supports a password-based form of encryption. Even though known
+ plaintext attacks have been found against it, it is still useful
+ to be able to get data out of such a file.
+
+ Usage:
+ zd = _ZipDecrypter(mypwd)
+ plain_char = zd(cypher_char)
+ plain_text = map(zd, cypher_text)
+ """
+
+ def _GenerateCRCTable():
+ """Generate a CRC-32 table.
+
+ ZIP encryption uses the CRC32 one-byte primitive for scrambling some
+ internal keys. We noticed that a direct implementation is faster than
+ relying on binascii.crc32().
+ """
+ poly = 0xedb88320
+ table = [0] * 256
+ for i in range(256):
+ crc = i
+ for j in range(8):
+ if crc & 1:
+ crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
+ else:
+ crc = ((crc >> 1) & 0x7FFFFFFF)
+ table[i] = crc
+ return table
+ crctable = _GenerateCRCTable()
+
+ def _crc32(self, ch, crc):
+ """Compute the CRC32 primitive on one byte."""
+ return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
+
+ def __init__(self, pwd):
+ self.key0 = 305419896
+ self.key1 = 591751049
+ self.key2 = 878082192
+ for p in pwd:
+ self._UpdateKeys(p)
+
+ def _UpdateKeys(self, c):
+ self.key0 = self._crc32(c, self.key0)
+ self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
+ self.key1 = (self.key1 * 134775813 + 1) & 4294967295
+ self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
+
+ def __call__(self, c):
+ """Decrypt a single character."""
+ c = ord(c)
+ k = self.key2 | 2
+ c = c ^ (((k * (k^1)) >> 8) & 255)
+ c = chr(c)
+ self._UpdateKeys(c)
+ return c
+
+class ZipExtFile(io.BufferedIOBase):
+ """File-like object for reading an archive member.
+ Is returned by ZipFile.open().
+ """
+
+ # Max size supported by decompressor.
+ MAX_N = 1 << 31 - 1
+
+ # Read from compressed files in 4k blocks.
+ MIN_READ_SIZE = 4096
+
+ # Search for universal newlines or line chunks.
+ PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
+
+ def __init__(self, fileobj, mode, zipinfo, decrypter=None):
+ self._fileobj = fileobj
+ self._decrypter = decrypter
+
+ self._compress_type = zipinfo.compress_type
+ self._compress_size = zipinfo.compress_size
+ self._compress_left = zipinfo.compress_size
+
+ if self._compress_type == ZIP_DEFLATED:
+ self._decompressor = zlib.decompressobj(-15)
+ self._unconsumed = ''
+
+ self._readbuffer = ''
+ self._offset = 0
+
+ self._universal = 'U' in mode
+ self.newlines = None
+
+ # Adjust read size for encrypted files since the first 12 bytes
+ # are for the encryption/password information.
+ if self._decrypter is not None:
+ self._compress_left -= 12
+
+ self.mode = mode
+ self.name = zipinfo.filename
+
+ def readline(self, limit=-1):
+ """Read and return a line from the stream.
+
+ If limit is specified, at most limit bytes will be read.
+ """
+
+ if not self._universal and limit < 0:
+ # Shortcut common case - newline found in buffer.
+ i = self._readbuffer.find('\n', self._offset) + 1
+ if i > 0:
+ line = self._readbuffer[self._offset: i]
+ self._offset = i
+ return line
+
+ if not self._universal:
+ return io.BufferedIOBase.readline(self, limit)
+
+ line = ''
+ while limit < 0 or len(line) < limit:
+ readahead = self.peek(2)
+ if readahead == '':
+ return line
+
+ #
+ # Search for universal newlines or line chunks.
+ #
+ # The pattern returns either a line chunk or a newline, but not
+ # both. Combined with peek(2), we are assured that the sequence
+ # '\r\n' is always retrieved completely and never split into
+ # separate newlines - '\r', '\n' due to coincidental readaheads.
+ #
+ match = self.PATTERN.search(readahead)
+ newline = match.group('newline')
+ if newline is not None:
+ if self.newlines is None:
+ self.newlines = []
+ if newline not in self.newlines:
+ self.newlines.append(newline)
+ self._offset += len(newline)
+ return line + '\n'
+
+ chunk = match.group('chunk')
+ if limit >= 0:
+ chunk = chunk[: limit - len(line)]
+
+ self._offset += len(chunk)
+ line += chunk
+
+ return line
+
+ def peek(self, n=1):
+ """Returns buffered bytes without advancing the position."""
+ if n > len(self._readbuffer) - self._offset:
+ chunk = self.read(n)
+ self._offset -= len(chunk)
+
+ # Return up to 512 bytes to reduce allocation overhead for tight loops.
+ return self._readbuffer[self._offset: self._offset + 512]
+
+ def readable(self):
+ return True
+
+ def read(self, n=-1):
+ """Read and return up to n bytes.
+ If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
+ """
+
+ buf = ''
+ while n < 0 or n is None or n > len(buf):
+ data = self.read1(n)
+ if len(data) == 0:
+ return buf
+
+ buf += data
+
+ return buf
+
+ def read1(self, n):
+ """Read up to n bytes with at most one read() system call."""
+
+ # Simplify algorithm (branching) by transforming negative n to large n.
+ if n < 0 or n is None:
+ n = self.MAX_N
+
+ # Bytes available in read buffer.
+ len_readbuffer = len(self._readbuffer) - self._offset
+
+ # Read from file.
+ if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
+ nbytes = n - len_readbuffer - len(self._unconsumed)
+ nbytes = max(nbytes, self.MIN_READ_SIZE)
+ nbytes = min(nbytes, self._compress_left)
+
+ data = self._fileobj.read(nbytes)
+ self._compress_left -= len(data)
+
+ if data and self._decrypter is not None:
+ data = ''.join(map(self._decrypter, data))
+
+ if self._compress_type == ZIP_STORED:
+ self._readbuffer = self._readbuffer[self._offset:] + data
+ self._offset = 0
+ else:
+ # Prepare deflated bytes for decompression.
+ self._unconsumed += data
+
+ # Handle unconsumed data.
+ if (len(self._unconsumed) > 0 and n > len_readbuffer and
+ self._compress_type == ZIP_DEFLATED):
+ data = self._decompressor.decompress(
+ self._unconsumed,
+ max(n - len_readbuffer, self.MIN_READ_SIZE)
+ )
+
+ self._unconsumed = self._decompressor.unconsumed_tail
+ if len(self._unconsumed) == 0 and self._compress_left == 0:
+ data += self._decompressor.flush()
+
+ self._readbuffer = self._readbuffer[self._offset:] + data
+ self._offset = 0
+
+ # Read from buffer.
+ data = self._readbuffer[self._offset: self._offset + n]
+ self._offset += len(data)
+ return data
+
+
+
+class ZipFile:
+ """ Class with methods to open, read, write, close, list zip files.
+
+ z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
+
+ file: Either the path to the file, or a file-like object.
+ If it is a path, the file will be opened and closed by ZipFile.
+ mode: The mode can be either read "r", write "w" or append "a".
+ compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
+ allowZip64: if True ZipFile will create files with ZIP64 extensions when
+ needed, otherwise it will raise an exception when this would
+ be necessary.
+
+ """
+
+ fp = None # Set here since __del__ checks it
+
+ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
+ """Open the ZIP file with mode read "r", write "w" or append "a"."""
+ if mode not in ("r", "w", "a"):
+ raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
+
+ if compression == ZIP_STORED:
pass
- return userkeys
+ elif compression == ZIP_DEFLATED:
+ if not zlib:
+ raise RuntimeError,\
+ "Compression requires the (missing) zlib module"
+ else:
+ raise RuntimeError, "That compression method is not supported"
+
+ self._allowZip64 = allowZip64
+ self._didModify = False
+ self.debug = 0 # Level of printing: 0 through 3
+ self.NameToInfo = {} # Find file info given name
+ self.filelist = [] # List of ZipInfo instances for archive
+ self.compression = compression # Method of compression
+ self.mode = key = mode.replace('b', '')[0]
+ self.pwd = None
+ self.comment = ''
+
+ # Check if we were passed a file-like object
+ if isinstance(file, basestring):
+ self._filePassed = 0
+ self.filename = file
+ modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
+ try:
+ self.fp = open(file, modeDict[mode])
+ except IOError:
+ if mode == 'a':
+ mode = key = 'w'
+ self.fp = open(file, modeDict[mode])
+ else:
+ raise
+ else:
+ self._filePassed = 1
+ self.fp = file
+ self.filename = getattr(file, 'name', None)
+
+ if key == 'r':
+ self._GetContents()
+ elif key == 'w':
+ pass
+ elif key == 'a':
+ try: # See if file is a zip file
+ self._RealGetContents()
+ # seek to start of directory and overwrite
+ self.fp.seek(self.start_dir, 0)
+ except BadZipfile: # file is not a zip file, just append
+ self.fp.seek(0, 2)
+ else:
+ if not self._filePassed:
+ self.fp.close()
+ self.fp = None
+ raise RuntimeError, 'Mode must be "r", "w" or "a"'
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ self.close()
+
+ def _GetContents(self):
+ """Read the directory, making sure we close the file if the format
+ is bad."""
+ try:
+ self._RealGetContents()
+ except BadZipfile:
+ if not self._filePassed:
+ self.fp.close()
+ self.fp = None
+ raise
+
+ def _RealGetContents(self):
+ """Read in the table of contents for the ZIP file."""
+ fp = self.fp
+ endrec = _EndRecData(fp)
+ if not endrec:
+ raise BadZipfile, "File is not a zip file"
+ if self.debug > 1:
+ print endrec
+ size_cd = endrec[_ECD_SIZE] # bytes in central directory
+ offset_cd = endrec[_ECD_OFFSET] # offset of central directory
+ self.comment = endrec[_ECD_COMMENT] # archive comment
+
+ # "concat" is zero, unless zip was concatenated to another file
+ concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+ if endrec[_ECD_SIGNATURE] == stringEndArchive64:
+ # If Zip64 extension structures are present, account for them
+ concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+
+ if self.debug > 2:
+ inferred = concat + offset_cd
+ print "given, inferred, offset", offset_cd, inferred, concat
+ # self.start_dir: Position of start of central directory
+ self.start_dir = offset_cd + concat
+ fp.seek(self.start_dir, 0)
+ data = fp.read(size_cd)
+ fp = cStringIO.StringIO(data)
+ total = 0
+ while total < size_cd:
+ centdir = fp.read(sizeCentralDir)
+ if centdir[0:4] != stringCentralDir:
+ raise BadZipfile, "Bad magic number for central directory"
+ centdir = struct.unpack(structCentralDir, centdir)
+ if self.debug > 2:
+ print centdir
+ filename = fp.read(centdir[_CD_FILENAME_LENGTH])
+ # Create ZipInfo instance to store file information
+ x = ZipInfo(filename)
+ x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
+ x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
+ x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
+ (x.create_version, x.create_system, x.extract_version, x.reserved,
+ x.flag_bits, x.compress_type, t, d,
+ x.CRC, x.compress_size, x.file_size) = centdir[1:12]
+ x.volume, x.internal_attr, x.external_attr = centdir[15:18]
+ # Convert date/time code to (year, month, day, hour, min, sec)
+ x._raw_time = t
+ x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
+ t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+
+ x._decodeExtra()
+ x.header_offset = x.header_offset + concat
+ x.filename = x._decodeFilename()
+ self.filelist.append(x)
+ self.NameToInfo[x.filename] = x
+
+ # update total bytes read from central directory
+ total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
+ + centdir[_CD_EXTRA_FIELD_LENGTH]
+ + centdir[_CD_COMMENT_LENGTH])
+
+ if self.debug > 2:
+ print "total", total
+
+
+ def namelist(self):
+ """Return a list of file names in the archive."""
+ l = []
+ for data in self.filelist:
+ l.append(data.filename)
+ return l
+
+ def infolist(self):
+ """Return a list of class ZipInfo instances for files in the
+ archive."""
+ return self.filelist
+
+ def printdir(self):
+ """Print a table of contents for the zip file."""
+ print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
+ for zinfo in self.filelist:
+ date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
+ print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
+
+ def testzip(self):
+ """Read all the files and check the CRC."""
+ chunk_size = 2 ** 20
+ for zinfo in self.filelist:
+ try:
+ # Read by chunks, to avoid an OverflowError or a
+ # MemoryError with very large embedded files.
+ f = self.open(zinfo.filename, "r")
+ while f.read(chunk_size): # Check CRC-32
+ pass
+ except BadZipfile:
+ return zinfo.filename
+
+ def getinfo(self, name):
+ """Return the instance of ZipInfo given 'name'."""
+ info = self.NameToInfo.get(name)
+ if info is None:
+ raise KeyError(
+ 'There is no item named %r in the archive' % name)
+
+ return info
+
+ def setpassword(self, pwd):
+ """Set default password for encrypted files."""
+ self.pwd = pwd
+
+ def read(self, name, pwd=None):
+ """Return file bytes (as a string) for name."""
+ return self.open(name, "r", pwd).read()
+
+ def open(self, name, mode="r", pwd=None):
+ """Return file-like object for 'name'."""
+ if mode not in ("r", "U", "rU"):
+ raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
+ if not self.fp:
+ raise RuntimeError, \
+ "Attempt to read ZIP archive that was already closed"
+
+ # Only open a new file for instances where we were not
+ # given a file object in the constructor
+ if self._filePassed:
+ zef_file = self.fp
+ else:
+ zef_file = open(self.filename, 'rb')
+
+ # Make sure we have an info object
+ if isinstance(name, ZipInfo):
+ # 'name' is already an info object
+ zinfo = name
+ else:
+ # Get info object for name
+ zinfo = self.getinfo(name)
+
+ zef_file.seek(zinfo.header_offset, 0)
+
+ # Skip the file header:
+ fheader = zef_file.read(sizeFileHeader)
+ if fheader[0:4] != stringFileHeader:
+ raise BadZipfile, "Bad magic number for file header"
+
+ fheader = struct.unpack(structFileHeader, fheader)
+ fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
+ if fheader[_FH_EXTRA_FIELD_LENGTH]:
+ zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
+
+ if fname != zinfo.orig_filename:
+ raise BadZipfile, \
+ 'File name in directory "%s" and header "%s" differ.' % (
+ zinfo.orig_filename, fname)
+
+ # check for encrypted flag & handle password
+ is_encrypted = zinfo.flag_bits & 0x1
+ zd = None
+ if is_encrypted:
+ if not pwd:
+ pwd = self.pwd
+ if not pwd:
+ raise RuntimeError, "File %s is encrypted, " \
+ "password required for extraction" % name
+
+ zd = _ZipDecrypter(pwd)
+ # The first 12 bytes in the cypher stream is an encryption header
+ # used to strengthen the algorithm. The first 11 bytes are
+ # completely random, while the 12th contains the MSB of the CRC,
+ # or the MSB of the file time depending on the header type
+ # and is used to check the correctness of the password.
+ bytes = zef_file.read(12)
+ h = map(zd, bytes[0:12])
+ if zinfo.flag_bits & 0x8:
+ # compare against the file type from extended local headers
+ check_byte = (zinfo._raw_time >> 8) & 0xff
+ else:
+ # compare against the CRC otherwise
+ check_byte = (zinfo.CRC >> 24) & 0xff
+ if ord(h[11]) != check_byte:
+ raise RuntimeError("Bad password for file", name)
+
+ return ZipExtFile(zef_file, mode, zinfo, zd)
+
+ def extract(self, member, path=None, pwd=None):
+ """Extract a member from the archive to the current working directory,
+ using its full name. Its file information is extracted as accurately
+ as possible. `member' may be a filename or a ZipInfo object. You can
+ specify a different directory using `path'.
+ """
+ if not isinstance(member, ZipInfo):
+ member = self.getinfo(member)
+
+ if path is None:
+ path = os.getcwd()
+
+ return self._extract_member(member, path, pwd)
+
+ def extractall(self, path=None, members=None, pwd=None):
+ """Extract all members from the archive to the current working
+ directory. `path' specifies a different directory to extract to.
+ `members' is optional and must be a subset of the list returned
+ by namelist().
+ """
+ if members is None:
+ members = self.namelist()
+
+ for zipinfo in members:
+ self.extract(zipinfo, path, pwd)
+
+ def _extract_member(self, member, targetpath, pwd):
+ """Extract the ZipInfo object 'member' to a physical
+ file on the path targetpath.
+ """
+ # build the destination pathname, replacing
+ # forward slashes to platform specific separators.
+ # Strip trailing path separator, unless it represents the root.
+ if (targetpath[-1:] in (os.path.sep, os.path.altsep)
+ and len(os.path.splitdrive(targetpath)[1]) > 1):
+ targetpath = targetpath[:-1]
+
+ # don't include leading "/" from file name if present
+ if member.filename[0] == '/':
+ targetpath = os.path.join(targetpath, member.filename[1:])
+ else:
+ targetpath = os.path.join(targetpath, member.filename)
+
+ targetpath = os.path.normpath(targetpath)
+
+ # Create all upper directories if necessary.
+ upperdirs = os.path.dirname(targetpath)
+ if upperdirs and not os.path.exists(upperdirs):
+ os.makedirs(upperdirs)
+
+ if member.filename[-1] == '/':
+ if not os.path.isdir(targetpath):
+ os.mkdir(targetpath)
+ return targetpath
+
+ source = self.open(member, pwd=pwd)
+ target = file(targetpath, "wb")
+ shutil.copyfileobj(source, target)
+ source.close()
+ target.close()
+
+ return targetpath
+
+ def _writecheck(self, zinfo):
+ """Check for errors before writing a file to the archive."""
+ if zinfo.filename in self.NameToInfo:
+ if self.debug: # Warning for duplicate names
+ print "Duplicate name:", zinfo.filename
+ if self.mode not in ("w", "a"):
+ raise RuntimeError, 'write() requires mode "w" or "a"'
+ if not self.fp:
+ raise RuntimeError, \
+ "Attempt to write ZIP archive that was already closed"
+ if zinfo.compress_type == ZIP_DEFLATED and not zlib:
+ raise RuntimeError, \
+ "Compression requires the (missing) zlib module"
+ if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
+ raise RuntimeError, \
+ "That compression method is not supported"
+ if zinfo.file_size > ZIP64_LIMIT:
+ if not self._allowZip64:
+ raise LargeZipFile("Filesize would require ZIP64 extensions")
+ if zinfo.header_offset > ZIP64_LIMIT:
+ if not self._allowZip64:
+ raise LargeZipFile("Zipfile size would require ZIP64 extensions")
+
+ def write(self, filename, arcname=None, compress_type=None):
+ """Put the bytes from filename into the archive under the name
+ arcname."""
+ if not self.fp:
+ raise RuntimeError(
+ "Attempt to write to ZIP archive that was already closed")
+
+ st = os.stat(filename)
+ isdir = stat.S_ISDIR(st.st_mode)
+ mtime = time.localtime(st.st_mtime)
+ date_time = mtime[0:6]
+ # Create ZipInfo instance to store file information
+ if arcname is None:
+ arcname = filename
+ arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
+ while arcname[0] in (os.sep, os.altsep):
+ arcname = arcname[1:]
+ if isdir:
+ arcname += '/'
+ zinfo = ZipInfo(arcname, date_time)
+ zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
+ if compress_type is None:
+ zinfo.compress_type = self.compression
+ else:
+ zinfo.compress_type = compress_type
+
+ zinfo.file_size = st.st_size
+ zinfo.flag_bits = 0x00
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
+
+ self._writecheck(zinfo)
+ self._didModify = True
+
+ if isdir:
+ zinfo.file_size = 0
+ zinfo.compress_size = 0
+ zinfo.CRC = 0
+ self.filelist.append(zinfo)
+ self.NameToInfo[zinfo.filename] = zinfo
+ self.fp.write(zinfo.FileHeader())
+ return
+
+ with open(filename, "rb") as fp:
+ # Must overwrite CRC and sizes with correct data later
+ zinfo.CRC = CRC = 0
+ zinfo.compress_size = compress_size = 0
+ zinfo.file_size = file_size = 0
+ self.fp.write(zinfo.FileHeader())
+ if zinfo.compress_type == ZIP_DEFLATED:
+ cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
+ zlib.DEFLATED, -15)
+ else:
+ cmpr = None
+ while 1:
+ buf = fp.read(1024 * 8)
+ if not buf:
+ break
+ file_size = file_size + len(buf)
+ CRC = crc32(buf, CRC) & 0xffffffff
+ if cmpr:
+ buf = cmpr.compress(buf)
+ compress_size = compress_size + len(buf)
+ self.fp.write(buf)
+ if cmpr:
+ buf = cmpr.flush()
+ compress_size = compress_size + len(buf)
+ self.fp.write(buf)
+ zinfo.compress_size = compress_size
+ else:
+ zinfo.compress_size = file_size
+ zinfo.CRC = CRC
+ zinfo.file_size = file_size
+ # Seek backwards and write CRC and file sizes
+ position = self.fp.tell() # Preserve current position in file
+ self.fp.seek(zinfo.header_offset + 14, 0)
+ self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
+ zinfo.file_size))
+ self.fp.seek(position, 0)
+ self.filelist.append(zinfo)
+ self.NameToInfo[zinfo.filename] = zinfo
+
+ def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
+ """Write a file into the archive. The contents is the string
+ 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
+ the name of the file in the archive."""
+ if not isinstance(zinfo_or_arcname, ZipInfo):
+ zinfo = ZipInfo(filename=zinfo_or_arcname,
+ date_time=time.localtime(time.time())[:6])
+
+ zinfo.compress_type = self.compression
+ zinfo.external_attr = 0600 << 16
+ else:
+ zinfo = zinfo_or_arcname
+
+ if not self.fp:
+ raise RuntimeError(
+ "Attempt to write to ZIP archive that was already closed")
+
+ if compress_type is not None:
+ zinfo.compress_type = compress_type
+
+ zinfo.file_size = len(bytes) # Uncompressed size
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
+ self._writecheck(zinfo)
+ self._didModify = True
+ zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
+ if zinfo.compress_type == ZIP_DEFLATED:
+ co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
+ zlib.DEFLATED, -15)
+ bytes = co.compress(bytes) + co.flush()
+ zinfo.compress_size = len(bytes) # Compressed size
+ else:
+ zinfo.compress_size = zinfo.file_size
+ zinfo.header_offset = self.fp.tell() # Start of header bytes
+ self.fp.write(zinfo.FileHeader())
+ self.fp.write(bytes)
+ self.fp.flush()
+ if zinfo.flag_bits & 0x08:
+ # Write CRC and file sizes after the file data
+ self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
+ zinfo.file_size))
+ self.filelist.append(zinfo)
+ self.NameToInfo[zinfo.filename] = zinfo
+
+ def __del__(self):
+ """Call the "close()" method in case the user forgot."""
+ self.close()
+
+ def close(self):
+ """Close the file, and for mode "w" and "a" write the ending
+ records."""
+ if self.fp is None:
+ return
+
+ if self.mode in ("w", "a") and self._didModify: # write ending records
+ count = 0
+ pos1 = self.fp.tell()
+ for zinfo in self.filelist: # write central directory
+ count = count + 1
+ dt = zinfo.date_time
+ dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+ dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+ extra = []
+ if zinfo.file_size > ZIP64_LIMIT \
+ or zinfo.compress_size > ZIP64_LIMIT:
+ extra.append(zinfo.file_size)
+ extra.append(zinfo.compress_size)
+ file_size = 0xffffffff
+ compress_size = 0xffffffff
+ else:
+ file_size = zinfo.file_size
+ compress_size = zinfo.compress_size
+
+ if zinfo.header_offset > ZIP64_LIMIT:
+ extra.append(zinfo.header_offset)
+ header_offset = 0xffffffffL
+ else:
+ header_offset = zinfo.header_offset
+
+ extra_data = zinfo.extra
+ if extra:
+ # Append a ZIP64 field to the extra's
+ extra_data = struct.pack(
+ '<HH' + 'Q'*len(extra),
+ 1, 8*len(extra), *extra) + extra_data
+
+ extract_version = max(45, zinfo.extract_version)
+ create_version = max(45, zinfo.create_version)
+ else:
+ extract_version = zinfo.extract_version
+ create_version = zinfo.create_version
+
+ try:
+ filename, flag_bits = zinfo._encodeFilenameFlags()
+ centdir = struct.pack(structCentralDir,
+ stringCentralDir, create_version,
+ zinfo.create_system, extract_version, zinfo.reserved,
+ flag_bits, zinfo.compress_type, dostime, dosdate,
+ zinfo.CRC, compress_size, file_size,
+ len(filename), len(extra_data), len(zinfo.comment),
+ 0, zinfo.internal_attr, zinfo.external_attr,
+ header_offset)
+ except DeprecationWarning:
+ print >>sys.stderr, (structCentralDir,
+ stringCentralDir, create_version,
+ zinfo.create_system, extract_version, zinfo.reserved,
+ zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+ zinfo.CRC, compress_size, file_size,
+ len(zinfo.filename), len(extra_data), len(zinfo.comment),
+ 0, zinfo.internal_attr, zinfo.external_attr,
+ header_offset)
+ raise
+ self.fp.write(centdir)
+ self.fp.write(filename)
+ self.fp.write(extra_data)
+ self.fp.write(zinfo.comment)
+
+ pos2 = self.fp.tell()
+ # Write end-of-zip-archive record
+ centDirCount = count
+ centDirSize = pos2 - pos1
+ centDirOffset = pos1
+ if (centDirCount >= ZIP_FILECOUNT_LIMIT or
+ centDirOffset > ZIP64_LIMIT or
+ centDirSize > ZIP64_LIMIT):
+ # Need to write the ZIP64 end-of-archive records
+ zip64endrec = struct.pack(
+ structEndArchive64, stringEndArchive64,
+ 44, 45, 45, 0, 0, centDirCount, centDirCount,
+ centDirSize, centDirOffset)
+ self.fp.write(zip64endrec)
+
+ zip64locrec = struct.pack(
+ structEndArchive64Locator,
+ stringEndArchive64Locator, 0, pos2, 1)
+ self.fp.write(zip64locrec)
+ centDirCount = min(centDirCount, 0xFFFF)
+ centDirSize = min(centDirSize, 0xFFFFFFFF)
+ centDirOffset = min(centDirOffset, 0xFFFFFFFF)
+
+ # check for valid comment length
+ if len(self.comment) >= ZIP_MAX_COMMENT:
+ if self.debug > 0:
+ msg = 'Archive comment is too long; truncating to %d bytes' \
+ % ZIP_MAX_COMMENT
+ self.comment = self.comment[:ZIP_MAX_COMMENT]
+
+ endrec = struct.pack(structEndArchive, stringEndArchive,
+ 0, 0, centDirCount, centDirCount,
+ centDirSize, centDirOffset, len(self.comment))
+ self.fp.write(endrec)
+ self.fp.write(self.comment)
+ self.fp.flush()
+
+ if not self._filePassed:
+ self.fp.close()
+ self.fp = None
+
+
+class PyZipFile(ZipFile):
+ """Class to create ZIP archives with Python library files and packages."""
+
+ def writepy(self, pathname, basename = ""):
+ """Add all files from "pathname" to the ZIP archive.
+
+ If pathname is a package directory, search the directory and
+ all package subdirectories recursively for all *.py and enter
+ the modules into the archive. If pathname is a plain
+ directory, listdir *.py and enter all modules. Else, pathname
+ must be a Python *.py file and the module will be put into the
+ archive. Added modules are always module.pyo or module.pyc.
+ This method will compile the module.py into module.pyc if
+ necessary.
+ """
+ dir, name = os.path.split(pathname)
+ if os.path.isdir(pathname):
+ initname = os.path.join(pathname, "__init__.py")
+ if os.path.isfile(initname):
+ # This is a package directory, add it
+ if basename:
+ basename = "%s/%s" % (basename, name)
+ else:
+ basename = name
+ if self.debug:
+ print "Adding package in", pathname, "as", basename
+ fname, arcname = self._get_codename(initname[0:-3], basename)
+ if self.debug:
+ print "Adding", arcname
+ self.write(fname, arcname)
+ dirlist = os.listdir(pathname)
+ dirlist.remove("__init__.py")
+ # Add all *.py files and package subdirectories
+ for filename in dirlist:
+ path = os.path.join(pathname, filename)
+ root, ext = os.path.splitext(filename)
+ if os.path.isdir(path):
+ if os.path.isfile(os.path.join(path, "__init__.py")):
+ # This is a package directory, add it
+ self.writepy(path, basename) # Recursive call
+ elif ext == ".py":
+ fname, arcname = self._get_codename(path[0:-3],
+ basename)
+ if self.debug:
+ print "Adding", arcname
+ self.write(fname, arcname)
+ else:
+ # This is NOT a package directory, add its files at top level
+ if self.debug:
+ print "Adding files from directory", pathname
+ for filename in os.listdir(pathname):
+ path = os.path.join(pathname, filename)
+ root, ext = os.path.splitext(filename)
+ if ext == ".py":
+ fname, arcname = self._get_codename(path[0:-3],
+ basename)
+ if self.debug:
+ print "Adding", arcname
+ self.write(fname, arcname)
+ else:
+ if pathname[-3:] != ".py":
+ raise RuntimeError, \
+ 'Files added with writepy() must end with ".py"'
+ fname, arcname = self._get_codename(pathname[0:-3], basename)
+ if self.debug:
+ print "Adding file", arcname
+ self.write(fname, arcname)
+
+ def _get_codename(self, pathname, basename):
+ """Return (filename, archivename) for the path.
+
+ Given a module name path, return the correct file path and
+ archive name, compiling if necessary. For example, given
+ /python/lib/string, return (/python/lib/string.pyc, string).
+ """
+ file_py = pathname + ".py"
+ file_pyc = pathname + ".pyc"
+ file_pyo = pathname + ".pyo"
+ if os.path.isfile(file_pyo) and \
+ os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
+ fname = file_pyo # Use .pyo file
+ elif not os.path.isfile(file_pyc) or \
+ os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
+ import py_compile
+ if self.debug:
+ print "Compiling", file_py
+ try:
+ py_compile.compile(file_py, file_pyc, None, True)
+ except py_compile.PyCompileError,err:
+ print err.msg
+ fname = file_pyc
+ else:
+ fname = file_pyc
+ archivename = os.path.split(fname)[1]
+ if basename:
+ archivename = "%s/%s" % (basename, archivename)
+ return (fname, archivename)
+
+
+def main(args = None):
+ import textwrap
+ USAGE=textwrap.dedent("""\
+ Usage:
+ zipfile.py -l zipfile.zip # Show listing of a zipfile
+ zipfile.py -t zipfile.zip # Test if a zipfile is valid
+ zipfile.py -e zipfile.zip target # Extract zipfile into target dir
+ zipfile.py -c zipfile.zip src ... # Create zipfile from sources
+ """)
+ if args is None:
+ args = sys.argv[1:]
+
+ if not args or args[0] not in ('-l', '-c', '-e', '-t'):
+ print USAGE
+ sys.exit(1)
+
+ if args[0] == '-l':
+ if len(args) != 2:
+ print USAGE
+ sys.exit(1)
+ zf = ZipFile(args[1], 'r')
+ zf.printdir()
+ zf.close()
+
+ elif args[0] == '-t':
+ if len(args) != 2:
+ print USAGE
+ sys.exit(1)
+ zf = ZipFile(args[1], 'r')
+ zf.testzip()
+ print "Done testing"
+
+ elif args[0] == '-e':
+ if len(args) != 3:
+ print USAGE
+ sys.exit(1)
+
+ zf = ZipFile(args[1], 'r')
+ out = args[2]
+ for path in zf.namelist():
+ if path.startswith('./'):
+ tgt = os.path.join(out, path[2:])
+ else:
+ tgt = os.path.join(out, path)
+
+ tgtdir = os.path.dirname(tgt)
+ if not os.path.exists(tgtdir):
+ os.makedirs(tgtdir)
+ with open(tgt, 'wb') as fp:
+ fp.write(zf.read(path))
+ zf.close()
+
+ elif args[0] == '-c':
+ if len(args) < 3:
+ print USAGE
+ sys.exit(1)
+
+ def addToZip(zf, path, zippath):
+ if os.path.isfile(path):
+ zf.write(path, zippath, ZIP_DEFLATED)
+ elif os.path.isdir(path):
+ for nm in os.listdir(path):
+ addToZip(zf,
+ os.path.join(path, nm), os.path.join(zippath, nm))
+ # else: ignore
+
+ zf = ZipFile(args[1], 'w', allowZip64=True)
+ for src in args[2:]:
+ addToZip(zf, src, os.path.basename(src))
+
+ zf.close()
+
+if __name__ == "__main__":
+ main()
diff --git a/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py b/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py
index b54db80..8ddfae3 100644
--- a/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py
+++ b/DeDRM_calibre_plugin/DeDRM_plugin/zipfix.py
@@ -1,60 +1,188 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
-from __future__ import with_statement
+# zipfix.py, version 1.1
+# Copyright © 2010-2013 by some_updates, DiapDealer and Apprentice Alf
+
+# Released under the terms of the GNU General Public Licence, version 3
+# <http://www.gnu.org/licenses/>
+
+# Revision history:
+# 1.0 - Initial release
+# 1.1 - Updated to handle zip file metadata correctly
+
+"""
+Re-write zip (or ePub) fixing problems with file names (and mimetype entry).
+"""
__license__ = 'GPL v3'
+__version__ = "1.1"
+
+import sys
+import zlib
+import zipfilerugged
+import os
+import os.path
+import getopt
+from struct import unpack
+
+
+_FILENAME_LEN_OFFSET = 26
+_EXTRA_LEN_OFFSET = 28
+_FILENAME_OFFSET = 30
+_MAX_SIZE = 64 * 1024
+_MIMETYPE = 'application/epub+zip'
+
+class ZipInfo(zipfilerugged.ZipInfo):
+ def __init__(self, *args, **kwargs):
+ if 'compress_type' in kwargs:
+ compress_type = kwargs.pop('compress_type')
+ super(ZipInfo, self).__init__(*args, **kwargs)
+ self.compress_type = compress_type
+
+class fixZip:
+ def __init__(self, zinput, zoutput):
+ self.ztype = 'zip'
+ if zinput.lower().find('.epub') >= 0 :
+ self.ztype = 'epub'
+ self.inzip = zipfilerugged.ZipFile(zinput,'r')
+ self.outzip = zipfilerugged.ZipFile(zoutput,'w')
+ # open the input zip for reading only as a raw file
+ self.bzf = file(zinput,'rb')
+
+ def getlocalname(self, zi):
+ local_header_offset = zi.header_offset
+ self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
+ leninfo = self.bzf.read(2)
+ local_name_length, = unpack('<H', leninfo)
+ self.bzf.seek(local_header_offset + _FILENAME_OFFSET)
+ local_name = self.bzf.read(local_name_length)
+ return local_name
+
+ def uncompress(self, cmpdata):
+ dc = zlib.decompressobj(-15)
+ data = ''
+ while len(cmpdata) > 0:
+ if len(cmpdata) > _MAX_SIZE :
+ newdata = cmpdata[0:_MAX_SIZE]
+ cmpdata = cmpdata[_MAX_SIZE:]
+ else:
+ newdata = cmpdata
+ cmpdata = ''
+ newdata = dc.decompress(newdata)
+ unprocessed = dc.unconsumed_tail
+ if len(unprocessed) == 0:
+ newdata += dc.flush()
+ data += newdata
+ cmpdata += unprocessed
+ unprocessed = ''
+ return data
+
+ def getfiledata(self, zi):
+ # get file name length and exta data length to find start of file data
+ local_header_offset = zi.header_offset
+
+ self.bzf.seek(local_header_offset + _FILENAME_LEN_OFFSET)
+ leninfo = self.bzf.read(2)
+ local_name_length, = unpack('<H', leninfo)
-# Standard Python modules.
-import os, sys, re, hashlib
-from calibre_plugins.dedrm.__init__ import PLUGIN_NAME, PLUGIN_VERSION
+ self.bzf.seek(local_header_offset + _EXTRA_LEN_OFFSET)
+ exinfo = self.bzf.read(2)
+ extra_field_length, = unpack('<H', exinfo)
-def WineGetKeys(scriptpath, extension, wineprefix=""):
- import subprocess
- from subprocess import Popen, PIPE, STDOUT
+ self.bzf.seek(local_header_offset + _FILENAME_OFFSET + local_name_length + extra_field_length)
+ data = None
- import subasyncio
- from subasyncio import Process
+ # if not compressed we are good to go
+ if zi.compress_type == zipfilerugged.ZIP_STORED:
+ data = self.bzf.read(zi.file_size)
- if extension == u".k4i":
- import json
+ # if compressed we must decompress it using zlib
+ if zi.compress_type == zipfilerugged.ZIP_DEFLATED:
+ cmpdata = self.bzf.read(zi.compress_size)
+ data = self.uncompress(cmpdata)
- basepath, script = os.path.split(scriptpath)
- print u"{0} v{1}: Running {2} under Wine".format(PLUGIN_NAME, PLUGIN_VERSION, script)
+ return data
- outdirpath = os.path.join(basepath, u"winekeysdir")
- if not os.path.exists(outdirpath):
- os.makedirs(outdirpath)
- if wineprefix != "" and os.path.exists(wineprefix):
- cmdline = u"WINEPREFIX=\"{2}\" wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath,wineprefix)
- else:
- cmdline = u"wine python.exe \"{0}\" \"{1}\"".format(scriptpath,outdirpath)
- print u"{0} v{1}: Command line: “{2}”".format(PLUGIN_NAME, PLUGIN_VERSION, cmdline)
+ def fix(self):
+ # get the zipinfo for each member of the input archive
+ # and copy member over to output archive
+ # if problems exist with local vs central filename, fix them
+
+ # if epub write mimetype file first, with no compression
+ if self.ztype == 'epub':
+ # first get a ZipInfo with current time and no compression
+ mimeinfo = ZipInfo('mimetype',compress_type=zipfilerugged.ZIP_STORED)
+ mimeinfo.internal_attr = 1 # text file
+ try:
+ # if the mimetype is present, get its info, including time-stamp
+ oldmimeinfo = self.inzip.getinfo('mimetype')
+ # copy across useful fields
+ mimeinfo.date_time = oldmimeinfo.date_time
+ mimeinfo.comment = oldmimeinfo.comment
+ mimeinfo.extra = oldmimeinfo.extra
+ mimeinfo.internal_attr = oldmimeinfo.internal_attr
+ mimeinfo.external_attr = oldmimeinfo.external_attr
+ mimeinfo.create_system = oldmimeinfo.create_system
+ except:
+ pass
+ self.outzip.writestr(mimeinfo, _MIMETYPE)
+
+ # write the rest of the files
+ for zinfo in self.inzip.infolist():
+ if zinfo.filename != "mimetype" or self.ztype != 'epub':
+ data = None
+ try:
+ data = self.inzip.read(zinfo.filename)
+ except zipfilerugged.BadZipfile or zipfilerugged.error:
+ local_name = self.getlocalname(zinfo)
+ data = self.getfiledata(zinfo)
+ zinfo.filename = local_name
+
+ # create new ZipInfo with only the useful attributes from the old info
+ nzinfo = ZipInfo(zinfo.filename, zinfo.date_time, compress_type=zinfo.compress_type)
+ nzinfo.comment=zinfo.comment
+ nzinfo.extra=zinfo.extra
+ nzinfo.internal_attr=zinfo.internal_attr
+ nzinfo.external_attr=zinfo.external_attr
+ nzinfo.create_system=zinfo.create_system
+ self.outzip.writestr(nzinfo,data)
+
+ self.bzf.close()
+ self.inzip.close()
+ self.outzip.close()
+
+
+def usage():
+ print """usage: zipfix.py inputzip outputzip
+ inputzip is the source zipfile to fix
+ outputzip is the fixed zip archive
+ """
+
+
+def repairBook(infile, outfile):
+ if not os.path.exists(infile):
+ print "Error: Input Zip File does not exist"
+ return 1
try:
- cmdline = cmdline.encode(sys.getfilesystemencoding())
- p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=sys.stdout, stderr=STDOUT, close_fds=False)
- result = p2.wait("wait")
+ fr = fixZip(infile, outfile)
+ fr.fix()
+ return 0
except Exception, e:
- print u"{0} v{1}: Wine subprocess call error: {2}".format(PLUGIN_NAME, PLUGIN_VERSION, e.args[0])
- return []
-
- winekeys = []
- # get any files with extension in the output dir
- files = [f for f in os.listdir(outdirpath) if f.endswith(extension)]
- for filename in files:
- try:
- fpath = os.path.join(outdirpath, filename)
- with open(fpath, 'rb') as keyfile:
- if extension == u".k4i":
- new_key_value = json.loads(keyfile.read())
- else:
- new_key_value = keyfile.read()
- winekeys.append(new_key_value)
- except:
- print u"{0} v{1}: Error loading file {2}".format(PLUGIN_NAME, PLUGIN_VERSION, filename)
- traceback.print_exc()
- os.remove(fpath)
- print u"{0} v{1}: Found and decrypted {2} {3}".format(PLUGIN_NAME, PLUGIN_VERSION, len(winekeys), u"key file" if len(winekeys) == 1 else u"key files")
- return winekeys
+ print "Error Occurred ", e
+ return 2
+
+
+def main(argv=sys.argv):
+ if len(argv)!=3:
+ usage()
+ return 1
+ infile = argv[1]
+ outfile = argv[2]
+ return repairBook(infile, outfile)
+
+
+if __name__ == '__main__' :
+ sys.exit(main())