From 737d5e7f1e7e0763d9b5e42e3d0effb9c1dfc0ea Mon Sep 17 00:00:00 2001 From: NoDRM Date: Sun, 3 Dec 2023 10:45:09 +0100 Subject: Bunch of updates for the FileOpen script --- CHANGELOG.md | 1 + DeDRM_plugin/ineptpdf.py | 4 +- .../ineptpdf_8.4.51.pyw | 3163 -------------------- .../ineptpdf_fileopen.pyw | 3067 +++++++++++++++++++ 4 files changed, 3070 insertions(+), 3165 deletions(-) delete mode 100644 Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_8.4.51.pyw create mode 100644 Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw diff --git a/CHANGELOG.md b/CHANGELOG.md index bf8b806..c39fc95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -104,4 +104,5 @@ This is v10.0.9, a release candidate for v10.1.0. I don't expect there to be maj - Fix bugs in kgenpids.py, alfcrypto.py, mobidedrm.py and kindlekey.py that caused it to fail on Python 2 (#380). - Fix some bugs (Python 2 and Python 3) in erdr2pml.py (untested). - Fix file lock bug in androidkindlekey.py on Windows with Calibre >= 7 (untested). +- A bunch of updates to the external FileOpen ineptpdf script, might fix #442 (untested). diff --git a/DeDRM_plugin/ineptpdf.py b/DeDRM_plugin/ineptpdf.py index 6d53b95..15ea7fc 100755 --- a/DeDRM_plugin/ineptpdf.py +++ b/DeDRM_plugin/ineptpdf.py @@ -834,7 +834,7 @@ def num_value(x): x = resolve1(x) if not (isinstance(x, int) or isinstance(x, Decimal)): if STRICT: - raise PDFTypeError('Int or Float required: %r' % x) + raise PDFTypeError('Int or Decimal required: %r' % x) return 0 return x @@ -2042,7 +2042,7 @@ class PDFParser(PSStackParser): except PDFNoValidXRef: # fallback self.seek(0) - pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b') + pat = re.compile(rb'^(\\d+)\\s+(\\d+)\\s+obj\\b') offsets = {} xref = PDFXRef() while 1: diff --git a/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_8.4.51.pyw b/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_8.4.51.pyw deleted file mode 100644 index 4c301e6..0000000 --- a/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_8.4.51.pyw +++ /dev/null @@ -1,3163 +0,0 @@ -#! /usr/bin/python - -# ineptpdf8.4.51.pyw -# ineptpdf, version 8.4.51 - -# To run this program install Python 2.7 from http://www.python.org/download/ -# -# PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto -# -# and PyWin Extension (Win32API module) from -# http://sourceforge.net/projects/pywin32/files/ -# -# Make sure to install the dedicated versions for Python 2.7. -# -# It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit -# Windows system). -# -# Save this script file as -# ineptpdf8.4.51.pyw and double-click on it to run it. - -# Revision history: -# 1 - Initial release -# 2 - Improved determination of key-generation algorithm -# 3 - Correctly handle PDF >=1.5 cross-reference streams -# 4 - Removal of ciando's personal ID (anon) -# 5 - removing small bug with V3 ebooks (anon) -# 6 - changed to adeptkey4.der format for 1.7.2 support (anon) -# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon) -# 7 - Get cross reference streams and object streams working for input. -# Not yet supported on output but this only effects file size, -# not functionality. (anon2) -# 7.1 - Correct a problem when an old trailer is not followed by startxref (anon2) -# 7.2 - Correct malformed Mac OS resource forks for Stanza -# - Support for cross ref streams on output (decreases file size) (anon2) -# 7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2) -# "The root object is missing or invalid" in Adobe Reader. -# 7.4 - Force all generation numbers in output file to be 0, like in v6. -# Fallback code for wrong xref improved (search till last trailer -# instead of first) (anon2) -# 8 - fileopen user machine identifier support (Tetrachroma) -# 8.1 - fileopen user cookies support (Tetrachroma) -# 8.2 - fileopen user name/password support (Tetrachroma) -# 8.3 - fileopen session cookie support (Tetrachroma) -# 8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma) -# 8.3.2 - improved server result parsing (Tetrachroma) -# 8.4 - Ident4D and encrypted Uuid support (Tetrachroma) -# 8.4.1 - improved MAC address processing (Tetrachroma) -# 8.4.2 - FowP3Uuid fallback file processing (Tetrachroma) -# 8.4.3 - improved user/password pdf file detection (Tetrachroma) -# 8.4.4 - small bugfix (Tetrachroma) -# 8.4.5 - improved cookie host searching (Tetrachroma) -# 8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma) -# 8.4.7 - UTF-8 input file conversion (Tetrachroma) -# 8.4.8 - fix for more rare utf8 problems (Tetrachroma) -# 8.4.9 - solution for utf8 in comination with -# ident4id method (Tetrachroma) -# 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma) -# 8.4.11 - alternative ident4id calculation (Tetrachroma) -# 8.4.12 - fix for capital username characters and -# other unusual user login names (Tetrachroma & ZeroPoint) -# 8.4.13 - small bug fixes (Tetrachroma) -# 8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma) -# 8.4.15 - 'bad file descriptor'-fix (Tetrachroma) -# 8.4.16 - improves user/pass detection (Tetrachroma) -# 8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma) -# 8.4.18 - follow up bug fix for the DPRM problem, -# more readable error messages (Tetrachroma) -# 8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma) -# 8.4.20 - follow up patch (Tetrachroma) -# 8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma) -# 8.4.22 - disable prints for exception prevention (Tetrachroma) -# 8.4.23 - check for additional security attributes (Tetrachroma) -# 8.4.24 - improved cookie session support (Tetrachroma) -# 8.4.25 - more compatibility with unicode files (Tetrachroma) -# 8.4.26 - automated session/user cookie request function (works -# only with Firefox 3.x+) (Tetrachroma) -# 8.4.27 - user/password fallback -# 8.4.28 - AES decryption, improved misconfigured pdf handling, -# limited experimental APS support (Tetrachroma & Neisklar) -# 8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma) -# 8.4.30 - extended authorization attributes support (Tetrachroma) -# 8.4.31 - improved session cookie and better server response error -# handling (Tetrachroma) -# 8.4.33 - small cookie optimizations (Tetrachroma) -# 8.4.33 - debug output option (Tetrachroma) -# 8.4.34 - better user/password management -# handles the 'AskUnp' response) (Tetrachroma) -# 8.4.35 - special handling for non-standard systems (Tetrachroma) -# 8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma) -# 8.4.36 - FOPN_flock support (Tetrachroma) -# 8.4.37 - patch for unicode paths/filenames (Tetrachroma) -# 8.4.38 - small fix for user/password dialog (Tetrachroma) -# 8.4.39 - sophisticated request mode differentiation, forced -# uuid calculation (Tetrachroma) -# 8.4.40 - fix for non standard server responses (Tetrachroma) -# 8.4.41 - improved user/password request windows, -# better server response tolerance (Tetrachroma) -# 8.4.42 - improved nl/cr server response parsing (Tetrachroma) -# 8.4.43 - fix for user names longer than 13 characters and special -# uuid encryption (Tetrachroma) -# 8.4.44 - another fix for ident4d problem (Tetrachroma) -# 8.4.45 - 2nd fix for ident4d problem (Tetrachroma) -# 8.4.46 - script cleanup and optimizations (Tetrachroma) -# 8.4.47 - script identification change to Adobe Reader (Tetrachroma) -# 8.4.48 - improved tolerance for false file/registry entries (Tetrachroma) -# 8.4.49 - improved username encryption (Tetrachroma) -# 8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar) -# 8.4.51 - automatic APS offline key retrieval (works only for -# Onleihe right now) (80ka80 & Tetrachroma) - -""" -Decrypts Adobe ADEPT-encrypted and Fileopen PDF files. -""" - -from __future__ import with_statement - -__license__ = 'GPL v3' - -import sys -import os -import re -import zlib -import struct -import hashlib -from itertools import chain, islice -import xml.etree.ElementTree as etree -import Tkinter -import Tkconstants -import tkFileDialog -import tkMessageBox -# added for fileopen support -import urllib -import urlparse -import time -import socket -import string -import uuid -import subprocess -import time -import getpass -from ctypes import * -import traceback -import inspect -import tempfile -import sqlite3 -import httplib -try: - from Crypto.Cipher import ARC4 - # needed for newer pdfs - from Crypto.Cipher import AES - from Crypto.Hash import SHA256 - from Crypto.PublicKey import RSA - -except ImportError: - ARC4 = None - RSA = None -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -class ADEPTError(Exception): - pass - -# global variable (needed for fileopen and password decryption) -INPUTFILEPATH = '' -KEYFILEPATH = '' -PASSWORD = '' -DEBUG_MODE = False -IVERSION = '8.4.51' - -# Do we generate cross reference streams on output? -# 0 = never -# 1 = only if present in input -# 2 = always - -GEN_XREF_STM = 1 - -# This is the value for the current document -gen_xref_stm = False # will be set in PDFSerializer - -### -### ASN.1 parsing code from tlslite - -def bytesToNumber(bytes): - total = 0L - for byte in bytes: - total = (total << 8) + byte - return total - -class ASN1Error(Exception): - pass - -class ASN1Parser(object): - class Parser(object): - def __init__(self, bytes): - self.bytes = bytes - self.index = 0 - - def get(self, length): - if self.index + length > len(self.bytes): - raise ASN1Error("Error decoding ASN.1") - x = 0 - for count in range(length): - x <<= 8 - x |= self.bytes[self.index] - self.index += 1 - return x - - def getFixBytes(self, lengthBytes): - bytes = self.bytes[self.index : self.index+lengthBytes] - self.index += lengthBytes - return bytes - - def getVarBytes(self, lengthLength): - lengthBytes = self.get(lengthLength) - return self.getFixBytes(lengthBytes) - - def getFixList(self, length, lengthList): - l = [0] * lengthList - for x in range(lengthList): - l[x] = self.get(length) - return l - - def getVarList(self, length, lengthLength): - lengthList = self.get(lengthLength) - if lengthList % length != 0: - raise ASN1Error("Error decoding ASN.1") - lengthList = int(lengthList/length) - l = [0] * lengthList - for x in range(lengthList): - l[x] = self.get(length) - return l - - def startLengthCheck(self, lengthLength): - self.lengthCheck = self.get(lengthLength) - self.indexCheck = self.index - - def setLengthCheck(self, length): - self.lengthCheck = length - self.indexCheck = self.index - - def stopLengthCheck(self): - if (self.index - self.indexCheck) != self.lengthCheck: - raise ASN1Error("Error decoding ASN.1") - - def atLengthCheck(self): - if (self.index - self.indexCheck) < self.lengthCheck: - return False - elif (self.index - self.indexCheck) == self.lengthCheck: - return True - else: - raise ASN1Error("Error decoding ASN.1") - - def __init__(self, bytes): - p = self.Parser(bytes) - p.get(1) - self.length = self._getASN1Length(p) - self.value = p.getFixBytes(self.length) - - def getChild(self, which): - p = self.Parser(self.value) - for x in range(which+1): - markIndex = p.index - p.get(1) - length = self._getASN1Length(p) - p.getFixBytes(length) - return ASN1Parser(p.bytes[markIndex:p.index]) - - def _getASN1Length(self, p): - firstLength = p.get(1) - if firstLength<=127: - return firstLength - else: - lengthLength = firstLength & 0x7F - return p.get(lengthLength) - -### -### PDF parsing routines from pdfminer, with changes for EBX_HANDLER - -## Utilities -## -def choplist(n, seq): - '''Groups every n elements of the list.''' - r = [] - for x in seq: - r.append(x) - if len(r) == n: - yield tuple(r) - r = [] - return - -def nunpack(s, default=0): - '''Unpacks up to 4 bytes big endian.''' - l = len(s) - if not l: - return default - elif l == 1: - return ord(s) - elif l == 2: - return struct.unpack('>H', s)[0] - elif l == 3: - return struct.unpack('>L', '\x00'+s)[0] - elif l == 4: - return struct.unpack('>L', s)[0] - else: - return TypeError('invalid length: %d' % l) - - -STRICT = 0 - - -## PS Exceptions -## -class PSException(Exception): pass -class PSEOF(PSException): pass -class PSSyntaxError(PSException): pass -class PSTypeError(PSException): pass -class PSValueError(PSException): pass - - -## Basic PostScript Types -## - -# PSLiteral -class PSObject(object): pass - -class PSLiteral(PSObject): - ''' - PS literals (e.g. "/Name"). - Caution: Never create these objects directly. - Use PSLiteralTable.intern() instead. - ''' - def __init__(self, name): - self.name = name - return - - def __repr__(self): - name = [] - for char in self.name: - if not char.isalnum(): - char = '#%02x' % ord(char) - name.append(char) - return '/%s' % ''.join(name) - -# PSKeyword -class PSKeyword(PSObject): - ''' - PS keywords (e.g. "showpage"). - Caution: Never create these objects directly. - Use PSKeywordTable.intern() instead. - ''' - def __init__(self, name): - self.name = name - return - - def __repr__(self): - return self.name - -# PSSymbolTable -class PSSymbolTable(object): - - ''' - Symbol table that stores PSLiteral or PSKeyword. - ''' - - def __init__(self, classe): - self.dic = {} - self.classe = classe - return - - def intern(self, name): - if name in self.dic: - lit = self.dic[name] - else: - lit = self.classe(name) - self.dic[name] = lit - return lit - -PSLiteralTable = PSSymbolTable(PSLiteral) -PSKeywordTable = PSSymbolTable(PSKeyword) -LIT = PSLiteralTable.intern -KWD = PSKeywordTable.intern -KEYWORD_BRACE_BEGIN = KWD('{') -KEYWORD_BRACE_END = KWD('}') -KEYWORD_ARRAY_BEGIN = KWD('[') -KEYWORD_ARRAY_END = KWD(']') -KEYWORD_DICT_BEGIN = KWD('<<') -KEYWORD_DICT_END = KWD('>>') - - -def literal_name(x): - if not isinstance(x, PSLiteral): - if STRICT: - raise PSTypeError('Literal required: %r' % x) - else: - return str(x) - return x.name - -def keyword_name(x): - if not isinstance(x, PSKeyword): - if STRICT: - raise PSTypeError('Keyword required: %r' % x) - else: - return str(x) - return x.name - - -## PSBaseParser -## -EOL = re.compile(r'[\r\n]') -SPC = re.compile(r'\s') -NONSPC = re.compile(r'\S') -HEX = re.compile(r'[0-9a-fA-F]') -END_LITERAL = re.compile(r'[#/%\[\]()<>{}\s]') -END_HEX_STRING = re.compile(r'[^\s0-9a-fA-F]') -HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.') -END_NUMBER = re.compile(r'[^0-9]') -END_KEYWORD = re.compile(r'[#/%\[\]()<>{}\s]') -END_STRING = re.compile(r'[()\134]') -OCT_STRING = re.compile(r'[0-7]') -ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 } - -class PSBaseParser(object): - - ''' - Most basic PostScript parser that performs only basic tokenization. - ''' - BUFSIZ = 4096 - - def __init__(self, fp): - self.fp = fp - self.seek(0) - return - - def __repr__(self): - return '' % (self.fp, self.bufpos) - - def flush(self): - return - - def close(self): - self.flush() - return - - def tell(self): - return self.bufpos+self.charpos - - def poll(self, pos=None, n=80): - pos0 = self.fp.tell() - if not pos: - pos = self.bufpos+self.charpos - self.fp.seek(pos) - ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) - self.fp.seek(pos0) - return - - def seek(self, pos): - ''' - Seeks the parser to the given position. - ''' - self.fp.seek(pos) - # reset the status for nextline() - self.bufpos = pos - self.buf = '' - self.charpos = 0 - # reset the status for nexttoken() - self.parse1 = self.parse_main - self.tokens = [] - return - - def fillbuf(self): - if self.charpos < len(self.buf): return - # fetch next chunk. - self.bufpos = self.fp.tell() - self.buf = self.fp.read(self.BUFSIZ) - if not self.buf: - raise PSEOF('Unexpected EOF') - self.charpos = 0 - return - - def parse_main(self, s, i): - m = NONSPC.search(s, i) - if not m: - return (self.parse_main, len(s)) - j = m.start(0) - c = s[j] - self.tokenstart = self.bufpos+j - if c == '%': - self.token = '%' - return (self.parse_comment, j+1) - if c == '/': - self.token = '' - return (self.parse_literal, j+1) - if c in '-+' or c.isdigit(): - self.token = c - return (self.parse_number, j+1) - if c == '.': - self.token = c - return (self.parse_float, j+1) - if c.isalpha(): - self.token = c - return (self.parse_keyword, j+1) - if c == '(': - self.token = '' - self.paren = 1 - return (self.parse_string, j+1) - if c == '<': - self.token = '' - return (self.parse_wopen, j+1) - if c == '>': - self.token = '' - return (self.parse_wclose, j+1) - self.add_token(KWD(c)) - return (self.parse_main, j+1) - - def add_token(self, obj): - self.tokens.append((self.tokenstart, obj)) - return - - def parse_comment(self, s, i): - m = EOL.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_comment, len(s)) - j = m.start(0) - self.token += s[i:j] - # We ignore comments. - #self.tokens.append(self.token) - return (self.parse_main, j) - - def parse_literal(self, s, i): - m = END_LITERAL.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_literal, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '#': - self.hex = '' - return (self.parse_literal_hex, j+1) - self.add_token(LIT(self.token)) - return (self.parse_main, j) - - def parse_literal_hex(self, s, i): - c = s[i] - if HEX.match(c) and len(self.hex) < 2: - self.hex += c - return (self.parse_literal_hex, i+1) - if self.hex: - self.token += chr(int(self.hex, 16)) - return (self.parse_literal, i) - - def parse_number(self, s, i): - m = END_NUMBER.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_number, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '.': - self.token += c - return (self.parse_float, j+1) - try: - self.add_token(int(self.token)) - except ValueError: - pass - return (self.parse_main, j) - def parse_float(self, s, i): - m = END_NUMBER.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_float, len(s)) - j = m.start(0) - self.token += s[i:j] - self.add_token(float(self.token)) - return (self.parse_main, j) - - def parse_keyword(self, s, i): - m = END_KEYWORD.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_keyword, len(s)) - j = m.start(0) - self.token += s[i:j] - if self.token == 'true': - token = True - elif self.token == 'false': - token = False - else: - token = KWD(self.token) - self.add_token(token) - return (self.parse_main, j) - - def parse_string(self, s, i): - m = END_STRING.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_string, len(s)) - j = m.start(0) - self.token += s[i:j] - c = s[j] - if c == '\\': - self.oct = '' - return (self.parse_string_1, j+1) - if c == '(': - self.paren += 1 - self.token += c - return (self.parse_string, j+1) - if c == ')': - self.paren -= 1 - if self.paren: - self.token += c - return (self.parse_string, j+1) - self.add_token(self.token) - return (self.parse_main, j+1) - def parse_string_1(self, s, i): - c = s[i] - if OCT_STRING.match(c) and len(self.oct) < 3: - self.oct += c - return (self.parse_string_1, i+1) - if self.oct: - self.token += chr(int(self.oct, 8)) - return (self.parse_string, i) - if c in ESC_STRING: - self.token += chr(ESC_STRING[c]) - return (self.parse_string, i+1) - - def parse_wopen(self, s, i): - c = s[i] - if c.isspace() or HEX.match(c): - return (self.parse_hexstring, i) - if c == '<': - self.add_token(KEYWORD_DICT_BEGIN) - i += 1 - return (self.parse_main, i) - - def parse_wclose(self, s, i): - c = s[i] - if c == '>': - self.add_token(KEYWORD_DICT_END) - i += 1 - return (self.parse_main, i) - - def parse_hexstring(self, s, i): - m = END_HEX_STRING.search(s, i) - if not m: - self.token += s[i:] - return (self.parse_hexstring, len(s)) - j = m.start(0) - self.token += s[i:j] - token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), - SPC.sub('', self.token)) - self.add_token(token) - return (self.parse_main, j) - - def nexttoken(self): - while not self.tokens: - self.fillbuf() - (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) - token = self.tokens.pop(0) - return token - - def nextline(self): - ''' - Fetches a next line that ends either with \\r or \\n. - ''' - linebuf = '' - linepos = self.bufpos + self.charpos - eol = False - while 1: - self.fillbuf() - if eol: - c = self.buf[self.charpos] - # handle '\r\n' - if c == '\n': - linebuf += c - self.charpos += 1 - break - m = EOL.search(self.buf, self.charpos) - if m: - linebuf += self.buf[self.charpos:m.end(0)] - self.charpos = m.end(0) - if linebuf[-1] == '\r': - eol = True - else: - break - else: - linebuf += self.buf[self.charpos:] - self.charpos = len(self.buf) - return (linepos, linebuf) - - def revreadlines(self): - ''' - Fetches a next line backword. This is used to locate - the trailers at the end of a file. - ''' - self.fp.seek(0, 2) - pos = self.fp.tell() - buf = '' - while 0 < pos: - prevpos = pos - pos = max(0, pos-self.BUFSIZ) - self.fp.seek(pos) - s = self.fp.read(prevpos-pos) - if not s: break - while 1: - n = max(s.rfind('\r'), s.rfind('\n')) - if n == -1: - buf = s + buf - break - yield s[n:]+buf - s = s[:n] - buf = '' - return - - -## PSStackParser -## -class PSStackParser(PSBaseParser): - - def __init__(self, fp): - PSBaseParser.__init__(self, fp) - self.reset() - return - - def reset(self): - self.context = [] - self.curtype = None - self.curstack = [] - self.results = [] - return - - def seek(self, pos): - PSBaseParser.seek(self, pos) - self.reset() - return - - def push(self, *objs): - self.curstack.extend(objs) - return - def pop(self, n): - objs = self.curstack[-n:] - self.curstack[-n:] = [] - return objs - def popall(self): - objs = self.curstack - self.curstack = [] - return objs - def add_results(self, *objs): - self.results.extend(objs) - return - - def start_type(self, pos, type): - self.context.append((pos, self.curtype, self.curstack)) - (self.curtype, self.curstack) = (type, []) - return - def end_type(self, type): - if self.curtype != type: - raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) - objs = [ obj for (_,obj) in self.curstack ] - (pos, self.curtype, self.curstack) = self.context.pop() - return (pos, objs) - - def do_keyword(self, pos, token): - return - - def nextobject(self, direct=False): - ''' - Yields a list of objects: keywords, literals, strings, - numbers, arrays and dictionaries. Arrays and dictionaries - are represented as Python sequence and dictionaries. - ''' - while not self.results: - (pos, token) = self.nexttoken() - ##print (pos,token), (self.curtype, self.curstack) - if (isinstance(token, int) or - isinstance(token, float) or - isinstance(token, bool) or - isinstance(token, str) or - isinstance(token, PSLiteral)): - # normal token - self.push((pos, token)) - elif token == KEYWORD_ARRAY_BEGIN: - # begin array - self.start_type(pos, 'a') - elif token == KEYWORD_ARRAY_END: - # end array - try: - self.push(self.end_type('a')) - except PSTypeError: - if STRICT: raise - elif token == KEYWORD_DICT_BEGIN: - # begin dictionary - self.start_type(pos, 'd') - elif token == KEYWORD_DICT_END: - # end dictionary - try: - (pos, objs) = self.end_type('d') - if len(objs) % 2 != 0: - raise PSSyntaxError( - 'Invalid dictionary construct: %r' % objs) - d = dict((literal_name(k), v) \ - for (k,v) in choplist(2, objs)) - self.push((pos, d)) - except PSTypeError: - if STRICT: raise - else: - self.do_keyword(pos, token) - if self.context: - continue - else: - if direct: - return self.pop(1)[0] - self.flush() - obj = self.results.pop(0) - return obj - - -LITERAL_CRYPT = PSLiteralTable.intern('Crypt') -LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl')) -LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW')) -LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85')) - - -## PDF Objects -## -class PDFObject(PSObject): pass - -class PDFException(PSException): pass -class PDFTypeError(PDFException): pass -class PDFValueError(PDFException): pass -class PDFNotImplementedError(PSException): pass - - -## PDFObjRef -## -class PDFObjRef(PDFObject): - - def __init__(self, doc, objid, genno): - if objid == 0: - if STRICT: - raise PDFValueError('PDF object id cannot be 0.') - self.doc = doc - self.objid = objid - self.genno = genno - return - - def __repr__(self): - return '' % (self.objid, self.genno) - - def resolve(self): - return self.doc.getobj(self.objid) - - -# resolve -def resolve1(x): - ''' - Resolve an object. If this is an array or dictionary, - it may still contains some indirect objects inside. - ''' - while isinstance(x, PDFObjRef): - x = x.resolve() - return x - -def resolve_all(x): - ''' - Recursively resolve X and all the internals. - Make sure there is no indirect reference within the nested object. - This procedure might be slow. - ''' - while isinstance(x, PDFObjRef): - x = x.resolve() - if isinstance(x, list): - x = [ resolve_all(v) for v in x ] - elif isinstance(x, dict): - for (k,v) in x.iteritems(): - x[k] = resolve_all(v) - return x - -def decipher_all(decipher, objid, genno, x): - ''' - Recursively decipher X. - ''' - if isinstance(x, str): - return decipher(objid, genno, x) - decf = lambda v: decipher_all(decipher, objid, genno, v) - if isinstance(x, list): - x = [decf(v) for v in x] - elif isinstance(x, dict): - x = dict((k, decf(v)) for (k, v) in x.iteritems()) - return x - - -# Type cheking -def int_value(x): - x = resolve1(x) - if not isinstance(x, int): - if STRICT: - raise PDFTypeError('Integer required: %r' % x) - return 0 - return x - -def float_value(x): - x = resolve1(x) - if not isinstance(x, float): - if STRICT: - raise PDFTypeError('Float required: %r' % x) - return 0.0 - return x - -def num_value(x): - x = resolve1(x) - if not (isinstance(x, int) or isinstance(x, float)): - if STRICT: - raise PDFTypeError('Int or Float required: %r' % x) - return 0 - return x - -def str_value(x): - x = resolve1(x) - if not isinstance(x, str): - if STRICT: - raise PDFTypeError('String required: %r' % x) - return '' - return x - -def list_value(x): - x = resolve1(x) - if not (isinstance(x, list) or isinstance(x, tuple)): - if STRICT: - raise PDFTypeError('List required: %r' % x) - return [] - return x - -def dict_value(x): - x = resolve1(x) - if not isinstance(x, dict): - if STRICT: - raise PDFTypeError('Dict required: %r' % x) - return {} - return x - -def stream_value(x): - x = resolve1(x) - if not isinstance(x, PDFStream): - if STRICT: - raise PDFTypeError('PDFStream required: %r' % x) - return PDFStream({}, '') - return x - -# ascii85decode(data) -def ascii85decode(data): - n = b = 0 - out = '' - for c in data: - if '!' <= c and c <= 'u': - n += 1 - b = b*85+(ord(c)-33) - if n == 5: - out += struct.pack('>L',b) - n = b = 0 - elif c == 'z': - assert n == 0 - out += '\0\0\0\0' - elif c == '~': - if n: - for _ in range(5-n): - b = b*85+84 - out += struct.pack('>L',b)[:n-1] - break - return out - - -## PDFStream type -class PDFStream(PDFObject): - def __init__(self, dic, rawdata, decipher=None): - length = int_value(dic.get('Length', 0)) - eol = rawdata[length:] - # quick and dirty fix for false length attribute, - # might not work if the pdf stream parser has a problem - if decipher != None and decipher.__name__ == 'decrypt_aes': - if (len(rawdata) % 16) != 0: - cutdiv = len(rawdata) // 16 - rawdata = rawdata[:16*cutdiv] - else: - if eol in ('\r', '\n', '\r\n'): - rawdata = rawdata[:length] - - self.dic = dic - self.rawdata = rawdata - self.decipher = decipher - self.data = None - self.decdata = None - self.objid = None - self.genno = None - return - - def set_objid(self, objid, genno): - self.objid = objid - self.genno = genno - return - - def __repr__(self): - if self.rawdata: - return '' % \ - (self.objid, len(self.rawdata), self.dic) - else: - return '' % \ - (self.objid, len(self.data), self.dic) - - def decode(self): - assert self.data is None and self.rawdata is not None - data = self.rawdata - if self.decipher: - # Handle encryption - data = self.decipher(self.objid, self.genno, data) - if gen_xref_stm: - self.decdata = data # keep decrypted data - if 'Filter' not in self.dic: - self.data = data - self.rawdata = None - ##print self.dict - return - filters = self.dic['Filter'] - if not isinstance(filters, list): - filters = [ filters ] - for f in filters: - if f in LITERALS_FLATE_DECODE: - # will get errors if the document is encrypted. - data = zlib.decompress(data) - elif f in LITERALS_LZW_DECODE: - data = ''.join(LZWDecoder(StringIO(data)).run()) - elif f in LITERALS_ASCII85_DECODE: - data = ascii85decode(data) - elif f == LITERAL_CRYPT: - raise PDFNotImplementedError('/Crypt filter is unsupported') - else: - raise PDFNotImplementedError('Unsupported filter: %r' % f) - # apply predictors - if 'DP' in self.dic: - params = self.dic['DP'] - else: - params = self.dic.get('DecodeParms', {}) - if 'Predictor' in params: - pred = int_value(params['Predictor']) - if pred: - if pred != 12: - raise PDFNotImplementedError( - 'Unsupported predictor: %r' % pred) - if 'Columns' not in params: - raise PDFValueError( - 'Columns undefined for predictor=12') - columns = int_value(params['Columns']) - buf = '' - ent0 = '\x00' * columns - for i in xrange(0, len(data), columns+1): - pred = data[i] - ent1 = data[i+1:i+1+columns] - if pred == '\x02': - ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ - for (a,b) in zip(ent0,ent1)) - buf += ent1 - ent0 = ent1 - data = buf - self.data = data - self.rawdata = None - return - - def get_data(self): - if self.data is None: - self.decode() - return self.data - - def get_rawdata(self): - return self.rawdata - - def get_decdata(self): - if self.decdata is not None: - return self.decdata - data = self.rawdata - if self.decipher and data: - # Handle encryption - data = self.decipher(self.objid, self.genno, data) - return data - - -## PDF Exceptions -## -class PDFSyntaxError(PDFException): pass -class PDFNoValidXRef(PDFSyntaxError): pass -class PDFEncryptionError(PDFException): pass -class PDFPasswordIncorrect(PDFEncryptionError): pass - -# some predefined literals and keywords. -LITERAL_OBJSTM = PSLiteralTable.intern('ObjStm') -LITERAL_XREF = PSLiteralTable.intern('XRef') -LITERAL_PAGE = PSLiteralTable.intern('Page') -LITERAL_PAGES = PSLiteralTable.intern('Pages') -LITERAL_CATALOG = PSLiteralTable.intern('Catalog') - - -## XRefs -## - -## PDFXRef -## -class PDFXRef(object): - - def __init__(self): - self.offsets = None - return - - def __repr__(self): - return '' % len(self.offsets) - - def objids(self): - return self.offsets.iterkeys() - - def load(self, parser): - self.offsets = {} - while 1: - try: - (pos, line) = parser.nextline() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - if not line: - raise PDFNoValidXRef('Premature eof: %r' % parser) - if line.startswith('trailer'): - parser.seek(pos) - break - f = line.strip().split(' ') - if len(f) != 2: - raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) - try: - (start, nobjs) = map(int, f) - except ValueError: - raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) - for objid in xrange(start, start+nobjs): - try: - (_, line) = parser.nextline() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF - file corrupted?') - f = line.strip().split(' ') - if len(f) != 3: - raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) - (pos, genno, use) = f - if use != 'n': continue - self.offsets[objid] = (int(genno), int(pos)) - self.load_trailer(parser) - return - - KEYWORD_TRAILER = PSKeywordTable.intern('trailer') - def load_trailer(self, parser): - try: - (_,kwd) = parser.nexttoken() - assert kwd is self.KEYWORD_TRAILER - (_,dic) = parser.nextobject(direct=True) - except PSEOF: - x = parser.pop(1) - if not x: - raise PDFNoValidXRef('Unexpected EOF - file corrupted') - (_,dic) = x[0] - self.trailer = dict_value(dic) - return - - def getpos(self, objid): - try: - (genno, pos) = self.offsets[objid] - except KeyError: - raise - return (None, pos) - - -## PDFXRefStream -## -class PDFXRefStream(object): - - def __init__(self): - self.index = None - self.data = None - self.entlen = None - self.fl1 = self.fl2 = self.fl3 = None - return - - def __repr__(self): - return '' % self.index - - def objids(self): - for first, size in self.index: - for objid in xrange(first, first + size): - yield objid - - def load(self, parser, debug=0): - (_,objid) = parser.nexttoken() # ignored - (_,genno) = parser.nexttoken() # ignored - (_,kwd) = parser.nexttoken() - (_,stream) = parser.nextobject() - if not isinstance(stream, PDFStream) or \ - stream.dic['Type'] is not LITERAL_XREF: - raise PDFNoValidXRef('Invalid PDF stream spec.') - size = stream.dic['Size'] - index = stream.dic.get('Index', (0,size)) - self.index = zip(islice(index, 0, None, 2), - islice(index, 1, None, 2)) - (self.fl1, self.fl2, self.fl3) = stream.dic['W'] - self.data = stream.get_data() - self.entlen = self.fl1+self.fl2+self.fl3 - self.trailer = stream.dic - return - - def getpos(self, objid): - offset = 0 - for first, size in self.index: - if first <= objid and objid < (first + size): - break - offset += size - else: - raise KeyError(objid) - i = self.entlen * ((objid - first) + offset) - ent = self.data[i:i+self.entlen] - f1 = nunpack(ent[:self.fl1], 1) - if f1 == 1: - pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) - genno = nunpack(ent[self.fl1+self.fl2:]) - return (None, pos) - elif f1 == 2: - objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) - index = nunpack(ent[self.fl1+self.fl2:]) - return (objid, index) - # this is a free object - raise KeyError(objid) - - -## PDFDocument -## -## A PDFDocument object represents a PDF document. -## Since a PDF file is usually pretty big, normally it is not loaded -## at once. Rather it is parsed dynamically as processing goes. -## A PDF parser is associated with the document. -## -class PDFDocument(object): - - def __init__(self): - self.xrefs = [] - self.objs = {} - self.parsed_objs = {} - self.root = None - self.catalog = None - self.parser = None - self.encryption = None - self.decipher = None - # dictionaries for fileopen - self.fileopen = {} - self.urlresult = {} - self.ready = False - return - - # set_parser(parser) - # Associates the document with an (already initialized) parser object. - def set_parser(self, parser): - if self.parser: return - self.parser = parser - # The document is set to be temporarily ready during collecting - # all the basic information about the document, e.g. - # the header, the encryption information, and the access rights - # for the document. - self.ready = True - # Retrieve the information of each header that was appended - # (maybe multiple times) at the end of the document. - self.xrefs = parser.read_xref() - for xref in self.xrefs: - trailer = xref.trailer - if not trailer: continue - - # If there's an encryption info, remember it. - if 'Encrypt' in trailer: - #assert not self.encryption - try: - self.encryption = (list_value(trailer['ID']), - dict_value(trailer['Encrypt'])) - # fix for bad files - except: - self.encryption = ('ffffffffffffffffffffffffffffffffffff', - dict_value(trailer['Encrypt'])) - if 'Root' in trailer: - self.set_root(dict_value(trailer['Root'])) - break - else: - raise PDFSyntaxError('No /Root object! - Is this really a PDF?') - # The document is set to be non-ready again, until all the - # proper initialization (asking the password key and - # verifying the access permission, so on) is finished. - self.ready = False - return - - # set_root(root) - # Set the Root dictionary of the document. - # Each PDF file must have exactly one /Root dictionary. - def set_root(self, root): - self.root = root - self.catalog = dict_value(self.root) - if self.catalog.get('Type') is not LITERAL_CATALOG: - if STRICT: - raise PDFSyntaxError('Catalog not found!') - return - # initialize(password='') - # Perform the initialization with a given password. - # This step is mandatory even if there's no password associated - # with the document. - def initialize(self, password=''): - if not self.encryption: - self.is_printable = self.is_modifiable = self.is_extractable = True - self.ready = True - return - (docid, param) = self.encryption - type = literal_name(param['Filter']) - if type == 'Adobe.APS': - return self.initialize_adobe_ps(password, docid, param) - if type == 'Standard': - return self.initialize_standard(password, docid, param) - if type == 'EBX_HANDLER': - return self.initialize_ebx(password, docid, param) - if type == 'FOPN_fLock': - # remove of unnecessairy password attribute - return self.initialize_fopn_flock(docid, param) - if type == 'FOPN_foweb': - # remove of unnecessairy password attribute - return self.initialize_fopn(docid, param) - raise PDFEncryptionError('Unknown filter: param=%r' % param) - - def initialize_adobe_ps(self, password, docid, param): - global KEYFILEPATH - self.decrypt_key = self.genkey_adobe_ps(param) - self.genkey = self.genkey_v4 - self.decipher = self.decrypt_aes - self.ready = True - return - - def getPrincipalKey(self, k=None, url=None, referer=None): - if url == None: - url="ssl://edc.bibliothek-digital.de/edcws/services/urn:EDCLicenseService" - data1='<wsse:Security '+\ - 'xmlns:wsse="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-'+\ - '1.0.xsd"><wsse:UsernameToken><wsse:Username>edc_anonymous</wsse:Username&'+\ - 'gt;<wsse:Password Type="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-username-'+\ - 'token-profile-1.0#PasswordText">edc_anonymous</wsse:Password></wsse:UsernameToken&'+\ - 'gt;</wsse:Security>7de-de'+\ - '1010<'+\ - 'watermarkTemplateSeqNum>0' - if k not in url[:40]: - return None - #~ extract host and path: - host=re.compile(r'[a-zA-Z]://([^/]+)/.+', re.I).search(url).group(1) - urlpath=re.compile(r'[a-zA-Z]://[^/]+(/.+)', re.I).search(url).group(1) - - # open a socket connection on port 80 - - conn = httplib.HTTPSConnection(host, 443) - - #~ Headers for request - headers={"Accept": "*/*", "Host": host, "User-Agent": "Mozilla/3.0 (compatible; Acrobat EDC SOAP 1.0)", - "Content-Type": "text/xml; charset=utf-8", "Cache-Control": "no-cache", "SOAPAction": ""} - - # send data1 and headers - try: - conn.request("POST", urlpath, data1, headers) - except: - raise ADEPTError("Could not post request to '"+host+"'.") - - # read respose - try: - response = conn.getresponse() - responsedata=response.read() - except: - raise ADEPTError("Could not read response from '"+host+"'.") - - # close connection - conn.close() - - try: - key=re.compile(r'PricipalKey"((?!).)*]*>(((?!).)*)', re.I).search(responsedata).group(2) - - except : - key=None - return key - - def genkey_adobe_ps(self, param): - # nice little offline principal keys dictionary - principalkeys = { 'bibliothek-digital.de': 'Dzqx8McQUNd2CDzBVmtnweUxVWlqJTMqyYtiDIc4dZI='.decode('base64')} - for k, v in principalkeys.iteritems(): - result = self.getPrincipalKey(k) - #print result - if result != None: - principalkeys[k] = result.decode('base64') - else: - raise ADEPTError("No (Online) PrincipalKey found.") - - self.is_printable = self.is_modifiable = self.is_extractable = True -## print 'keyvalue' -## print len(keyvalue) -## print keyvalue.encode('hex') - length = int_value(param.get('Length', 0)) / 8 - edcdata = str_value(param.get('EDCData')).decode('base64') - pdrllic = str_value(param.get('PDRLLic')).decode('base64') - pdrlpol = str_value(param.get('PDRLPol')).decode('base64') - #print 'ecd rights' - edclist = [] - for pair in edcdata.split('\n'): - edclist.append(pair) -## print edclist -## print 'edcdata decrypted' -## print edclist[0].decode('base64').encode('hex') -## print edclist[1].decode('base64').encode('hex') -## print edclist[2].decode('base64').encode('hex') -## print edclist[3].decode('base64').encode('hex') -## print 'offlinekey' -## print len(edclist[9].decode('base64')) -## print pdrllic - # principal key request - for key in principalkeys: - if key in pdrllic: - principalkey = principalkeys[key] - else: - raise ADEPTError('Cannot find principal key for this pdf') -## print 'minorversion' -## print int(edclist[8]) - # fix for minor version -## minorversion = int(edclist[8]) - 100 -## if minorversion < 1: -## minorversion = 1 -## print int(minorversion) - shakey = SHA256.new() - shakey.update(principalkey) -## for i in range(0,minorversion): -## shakey.update(principalkey) - shakey = shakey.digest() -## shakey = SHA256.new(principalkey).digest() - ivector = 16 * chr(0) - #print shakey - plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) - if plaintext[-16:] != 16 * chr(16): - raise ADEPTError('Offlinekey cannot be decrypted, aborting (hint: redownload pdf) ...') - pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) - if ord(pdrlpol[-1]) < 1 or ord(pdrlpol[-1]) > 16: - raise ADEPTError('Could not decrypt PDRLPol, aborting ...') - else: - cutter = -1 * ord(pdrlpol[-1]) - #print cutter - pdrlpol = pdrlpol[:cutter] - #print plaintext.encode('hex') - #print 'pdrlpol' - #print pdrlpol - return plaintext[:16] - - PASSWORD_PADDING = '(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ - '\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' - # experimental aes pw support - def initialize_standard(self, password, docid, param): - # copy from a global variable - V = int_value(param.get('V', 0)) - if (V <=0 or V > 4): - raise PDFEncryptionError('Unknown algorithm: param=%r' % param) - length = int_value(param.get('Length', 40)) # Key length (bits) - O = str_value(param['O']) - R = int_value(param['R']) # Revision - if 5 <= R: - raise PDFEncryptionError('Unknown revision: %r' % R) - U = str_value(param['U']) - P = int_value(param['P']) - try: - EncMetadata = str_value(param['EncryptMetadata']) - except: - EncMetadata = 'True' - self.is_printable = bool(P & 4) - self.is_modifiable = bool(P & 8) - self.is_extractable = bool(P & 16) - self.is_annotationable = bool(P & 32) - self.is_formsenabled = bool(P & 256) - self.is_textextractable = bool(P & 512) - self.is_assemblable = bool(P & 1024) - self.is_formprintable = bool(P & 2048) - # Algorithm 3.2 - password = (password+self.PASSWORD_PADDING)[:32] # 1 - hash = hashlib.md5(password) # 2 - hash.update(O) # 3 - hash.update(struct.pack('= 3: - # Algorithm 3.5 - hash = hashlib.md5(self.PASSWORD_PADDING) # 2 - hash.update(docid[0]) # 3 - x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 - for i in xrange(1,19+1): - k = ''.join( chr(ord(c) ^ i) for c in key ) - x = ARC4.new(k).decrypt(x) - u1 = x+x # 32bytes total - if R == 2: - is_authenticated = (u1 == U) - else: - is_authenticated = (u1[:16] == U[:16]) - if not is_authenticated: - raise ADEPTError('Password is not correct.') -## raise PDFPasswordIncorrect - self.decrypt_key = key - # genkey method - if V == 1 or V == 2: - self.genkey = self.genkey_v2 - elif V == 3: - self.genkey = self.genkey_v3 - elif V == 4: - self.genkey = self.genkey_v2 - #self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 - # rc4 - if V != 4: - self.decipher = self.decipher_rc4 # XXX may be AES - # aes - elif V == 4 and Length == 128: - elf.decipher = self.decipher_aes - elif V == 4 and Length == 256: - raise PDFNotImplementedError('AES256 encryption is currently unsupported') - self.ready = True - return - - def initialize_ebx(self, password, docid, param): - global KEYFILEPATH - self.is_printable = self.is_modifiable = self.is_extractable = True - # keyfile path is wrong - if KEYFILEPATH == False: - errortext = 'Cannot find adeptkey.der keyfile. Use ineptkey to generate it.' - raise ADEPTError(errortext) - with open(password, 'rb') as f: - keyder = f.read() - # KEYFILEPATH = '' - key = ASN1Parser([ord(x) for x in keyder]) - key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)] - rsa = RSA.construct(key) - length = int_value(param.get('Length', 0)) / 8 - rights = str_value(param.get('ADEPT_LICENSE')).decode('base64') - rights = zlib.decompress(rights, -15) - rights = etree.fromstring(rights) - expr = './/{http://ns.adobe.com/adept}encryptedKey' - bookkey = ''.join(rights.findtext(expr)).decode('base64') - bookkey = rsa.decrypt(bookkey) - if bookkey[0] != '\x02': - raise ADEPTError('error decrypting book session key') - index = bookkey.index('\0') + 1 - bookkey = bookkey[index:] - ebx_V = int_value(param.get('V', 4)) - ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) - # added because of the booktype / decryption book session key error - if ebx_V == 3: - V = 3 - elif ebx_V < 4 or ebx_type < 6: - V = ord(bookkey[0]) - bookkey = bookkey[1:] - else: - V = 2 - if length and len(bookkey) != length: - raise ADEPTError('error decrypting book session key') - self.decrypt_key = bookkey - self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 - self.decipher = self.decrypt_rc4 - self.ready = True - return - - # fileopen support - def initialize_fopn_flock(self, docid, param): - raise ADEPTError('FOPN_fLock not supported, yet ...') - # debug mode processing - global DEBUG_MODE - global IVERSION - if DEBUG_MODE == True: - if os.access('.',os.W_OK) == True: - debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') - else: - raise ADEPTError('Cannot write debug file, current directory is not writable') - self.is_printable = self.is_modifiable = self.is_extractable = True - # get parameters and add it to the fo dictionary - self.fileopen['V'] = int_value(param.get('V',2)) - # crypt base - (docid, param) = self.encryption - #rights = dict_value(param['Info']) - rights = param['Info'] - #print rights - if DEBUG_MODE == True: debugfile.write(rights + '\n\n') -## for pair in rights.split(';'): -## try: -## key, value = pair.split('=',1) -## self.fileopen[key] = value -## # fix for some misconfigured INFO variables -## except: -## pass -## kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ -## 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} -## for keys in kattr: -## try: -## self.fileopen[kattr[keys]] = self.fileopen[keys] -## del self.fileopen[keys] -## except: -## continue - # differentiate OS types -## sysplatform = sys.platform -## # if ostype is Windows -## if sysplatform=='win32': -## self.osuseragent = 'Windows NT 6.0' -## self.get_macaddress = self.get_win_macaddress -## self.fo_sethwids = self.fo_win_sethwids -## self.BrowserCookie = WinBrowserCookie -## elif sysplatform=='linux2': -## adeptout = 'Linux is not supported, yet.\n' -## raise ADEPTError(adeptout) -## self.osuseragent = 'Linux i686' -## self.get_macaddress = self.get_linux_macaddress -## self.fo_sethwids = self.fo_linux_sethwids -## else: -## adeptout = '' -## adeptout = adeptout + 'Due to various privacy violations from Apple\n' -## adeptout = adeptout + 'Mac OS X support is disabled by default.' -## raise ADEPTError(adeptout) -## # add static arguments for http/https request -## self.fo_setattributes() -## # add hardware specific arguments for http/https request -## self.fo_sethwids() -## -## if 'Code' in self.urlresult: -## if self.fileopen['Length'] == len(self.urlresult['Code']): -## self.decrypt_key = self.urlresult['Code'] -## else: -## self.decrypt_key = self.urlresult['Code'].decode('hex') -## else: -## raise ADEPTError('Cannot find decryption key.') - self.decrypt_key = 'stuff' - self.genkey = self.genkey_v2 - self.decipher = self.decrypt_rc4 - self.ready = True - return - - def initialize_fopn(self, docid, param): - # debug mode processing - global DEBUG_MODE - global IVERSION - if DEBUG_MODE == True: - if os.access('.',os.W_OK) == True: - debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') - else: - raise ADEPTError('Cannot write debug file, current directory is not writable') - self.is_printable = self.is_modifiable = self.is_extractable = True - # get parameters and add it to the fo dictionary - self.fileopen['Length'] = int_value(param.get('Length', 0)) / 8 - self.fileopen['VEID'] = str_value(param.get('VEID')) - self.fileopen['BUILD'] = str_value(param.get('BUILD')) - self.fileopen['SVID'] = str_value(param.get('SVID')) - self.fileopen['DUID'] = str_value(param.get('DUID')) - self.fileopen['V'] = int_value(param.get('V',2)) - # crypt base - rights = str_value(param.get('INFO')).decode('base64') - rights = self.genkey_fileopeninfo(rights) - if DEBUG_MODE == True: debugfile.write(rights + '\n\n') - for pair in rights.split(';'): - try: - key, value = pair.split('=',1) - self.fileopen[key] = value - # fix for some misconfigured INFO variables - except: - pass - kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ - 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} - for keys in kattr: - # fishing some misconfigured slashs out of it - try: - self.fileopen[kattr[keys]] = urllib.quote(self.fileopen[keys],safe='') - del self.fileopen[keys] - except: - continue - # differentiate OS types - sysplatform = sys.platform - # if ostype is Windows - if sysplatform=='win32': - self.osuseragent = 'Windows NT 6.0' - self.get_macaddress = self.get_win_macaddress - self.fo_sethwids = self.fo_win_sethwids - self.BrowserCookie = WinBrowserCookie - elif sysplatform=='linux2': - adeptout = 'Linux is not supported, yet.\n' - raise ADEPTError(adeptout) - self.osuseragent = 'Linux i686' - self.get_macaddress = self.get_linux_macaddress - self.fo_sethwids = self.fo_linux_sethwids - else: - adeptout = '' - adeptout = adeptout + 'Mac OS X is not supported, yet.' - adeptout = adeptout + 'Read the blogs FAQs for more information' - raise ADEPTError(adeptout) - # add static arguments for http/https request - self.fo_setattributes() - # add hardware specific arguments for http/https request - self.fo_sethwids() - #if DEBUG_MODE == True: debugfile.write(self.fileopen) - if 'UURL' in self.fileopen: - buildurl = self.fileopen['UURL'] - else: - buildurl = self.fileopen['PURL'] - # fix for bad DPRM structure - if self.fileopen['DPRM'][0] != r'/': - self.fileopen['DPRM'] = r'/' + self.fileopen['DPRM'] - # genius fix for bad server urls (IMHO) - if '?' in self.fileopen['DPRM']: - buildurl = buildurl + self.fileopen['DPRM'] + '&' - else: - buildurl = buildurl + self.fileopen['DPRM'] + '?' - - # debug customization - #self.fileopen['Machine'] = '' - #self.fileopen['Disk'] = '' - - - surl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ - 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'OSName', 'OSData', 'Language',\ - 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ - 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk',\ - 'FormHFT',\ - 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ - 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ - 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ - 'FSName', 'FowpKbd', 'OSBuild',\ - 'RequestSchema') - - #settings request and special modes - if 'EVER' in self.fileopen and float(self.fileopen['EVER']) < 3.8: - self.fileopen['Mode'] = 'ICx' - - origurl = buildurl - buildurl = buildurl + 'Request=Setting' - for keys in surl: - try: - buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] - except: - continue - if DEBUG_MODE == True: debugfile.write( 'settings url:\n') - if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') - # custom user agent identification? - if 'AGEN' in self.fileopen: - useragent = self.fileopen['AGEN'] - urllib.URLopener.version = useragent - # attribute doesn't exist - take the default user agent - else: - urllib.URLopener.version = self.osuseragent - # try to open the url - try: - u = urllib.urlopen(buildurl) - u.geturl() - result = u.read() - except: - raise ADEPTError('No internet connection or a blocking firewall!') -## finally: -## u.close() - # getting rid of the line feed - if DEBUG_MODE == True: debugfile.write('Settings'+'\n') - if DEBUG_MODE == True: debugfile.write(result+'\n\n') - #get rid of unnecessary characters - result = result.rstrip('\n') - result = result.rstrip(chr(13)) - result = result.lstrip('\n') - result = result.lstrip(chr(13)) - self.surlresult = {} - for pair in result.split('&'): - try: - key, value = pair.split('=',1) - # fix for bad server response - if key not in self.surlresult: - self.surlresult[key] = value - except: - pass - if 'RequestSchema' in self.surlresult: - self.fileopen['RequestSchema'] = self.surlresult['RequestSchema'] - if 'ServerSessionData' in self.surlresult: - self.fileopen['ServerSessionData'] = self.surlresult['ServerSessionData'] - if 'SetScope' in self.surlresult: - self.fileopen['RequestSchema'] = self.surlresult['SetScope'] - #print self.surlresult - if 'RetVal' in self.surlresult and 'SEMO' not in self.fileopen and(('Reason' in self.surlresult and \ - self.surlresult['Reason'] == 'AskUnp') or ('SetTarget' in self.surlresult and\ - self.surlresult['SetTarget'] == 'UnpDlg')): - # get user and password dialog - try: - self.gen_pw_dialog(self.surlresult['UnpUiName'], self.surlresult['UnpUiPass'],\ - self.surlresult['UnpUiTitle'], self.surlresult['UnpUiOk'],\ - self.surlresult['UnpUiSunk'], self.surlresult['UnpUiComm']) - except: - self.gen_pw_dialog() - - # the fileopen check might not be always right because of strange server responses - if 'SEMO' in self.fileopen and (self.fileopen['SEMO'] == '1'\ - or self.fileopen['SEMO'] == '2') and ('CSES' in self.fileopen and\ - self.fileopen['CSES'] != 'fileopen'): - # get the url name for the cookie(s) - if 'CURL' in self.fileopen: - self.surl = self.fileopen['CURL'] - if 'CSES' in self.fileopen: - self.cses = self.fileopen['CSES'] - elif 'PHOS' in self.fileopen: - self.surl = self.fileopen['PHOS'] - elif 'LHOS' in self.fileopen: - self.surl = self.fileopen['LHOS'] - else: - raise ADEPTError('unknown Cookie name.\n Check ineptpdf forum for further assistance') - self.pwfieldreq = 1 - # session cookie processing - if self.fileopen['SEMO'] == '1': - cookies = self.BrowserCookie() - #print self.cses - #print self.surl - csession = cookies.getcookie(self.cses,self.surl) - if csession != None: - self.fileopen['Session'] = csession - self.gui = False - # fallback - else: - self.pwtk = Tkinter.Tk() - self.pwtk.title('Ineptpdf8') - self.pwtk.minsize(150, 0) - infotxt1 = 'Get the session cookie key manually (Firefox step-by-step:\n'+\ - 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ - '-> Search for a cookie from ' + self.surl +' with the\n'+\ - 'name ' + self.cses +' and copy paste the content field in the\n'+\ - 'Session Content field. Remove possible spaces or new lines at the '+\ - 'end\n (cursor must be blinking right behind the last character)' - self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) - self.label0.pack() - self.label1 = Tkinter.Label(self.pwtk, text="Session Content") - self.pwfieldreq = 0 - self.gui = True - # user cookie processing - elif self.fileopen['SEMO'] == '2': - cookies = self.BrowserCookie() - #print self.cses - #print self.surl - name = cookies.getcookie('name',self.surl) - passw = cookies.getcookie('pass',self.surl) - if name != None or passw != None: - self.fileopen['UserName'] = urllib.quote(name) - self.fileopen['UserPass'] = urllib.quote(passw) - self.gui = False - # fallback - else: - self.pwtk = Tkinter.Tk() - self.pwtk.title('Ineptpdf8') - self.pwtk.minsize(150, 0) - self.label1 = Tkinter.Label(self.pwtk, text="Username") - infotxt1 = 'Get the user cookie keys manually (Firefox step-by-step:\n'+\ - 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ - '-> Search for cookies from ' + self.surl +' with the\n'+\ - 'name name in the user field and copy paste the content field in the\n'+\ - 'username field. Do the same with the name pass in the password field).' - self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) - self.label0.pack() - self.pwfieldreq = 1 - self.gui = True -## else: -## self.pwtk = Tkinter.Tk() -## self.pwtk.title('Ineptpdf8') -## self.pwtk.minsize(150, 0) -## self.pwfieldreq = 0 -## self.label1 = Tkinter.Label(self.pwtk, text="Username") -## self.pwfieldreq = 1 -## self.gui = True - if self.gui == True: - self.un_entry = Tkinter.Entry(self.pwtk) - # cursor here - self.un_entry.focus() - self.label2 = Tkinter.Label(self.pwtk, text="Password") - self.pw_entry = Tkinter.Entry(self.pwtk, show="*") - self.button = Tkinter.Button(self.pwtk, text='Go for it!', command=self.fo_save_values) - # widget layout, stack vertical - self.label1.pack() - self.un_entry.pack() - # create a password label and field - if self.pwfieldreq == 1: - self.label2.pack() - self.pw_entry.pack() - self.button.pack() - self.pwtk.update() - # start the event loop - self.pwtk.mainloop() - - # original request - # drive through tupple for building the permission url - burl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ - 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'Language',\ - 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ - 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk', 'User', 'SaUser', 'SaSID',\ - # special security measures - 'HostIsDomain', 'PhysHostname', 'LogiHostname', 'SaRefDomain',\ - 'FormHFT', 'UserName', 'UserPass', 'Session', \ - 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ - 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ - 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ - 'FSName', 'ServerSessionData', 'FowpKbd', 'OSBuild', \ - 'DocumentSessionData', 'RequestSchema') - - buildurl = origurl - buildurl = buildurl + 'Request=DocPerm' - for keys in burl: - try: - buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] - except: - continue - if DEBUG_MODE == True: debugfile.write('1st url:'+'\n') - if DEBUG_MODE == True: debugfile.write(buildurl+'\n\n') - # custom user agent identification? - if 'AGEN' in self.fileopen: - useragent = self.fileopen['AGEN'] - urllib.URLopener.version = useragent - # attribute doesn't exist - take the default user agent - else: - urllib.URLopener.version = self.osuseragent - # try to open the url - try: - u = urllib.urlopen(buildurl) - u.geturl() - result = u.read() - except: - raise ADEPTError('No internet connection or a blocking firewall!') -## finally: -## u.close() - # getting rid of the line feed - if DEBUG_MODE == True: debugfile.write('1st preresult'+'\n') - if DEBUG_MODE == True: debugfile.write(result+'\n\n') - #get rid of unnecessary characters - result = result.rstrip('\n') - result = result.rstrip(chr(13)) - result = result.lstrip('\n') - result = result.lstrip(chr(13)) - self.urlresult = {} - for pair in result.split('&'): - try: - key, value = pair.split('=',1) - self.urlresult[key] = value - except: - pass -## if 'RequestSchema' in self.surlresult: -## self.fileopen['RequestSchema'] = self.urlresult['RequestSchema'] - #self.urlresult - #result[0:8] == 'RetVal=1') or (result[0:8] == 'RetVal=2'): - if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ - self.urlresult['RetVal'] != '2' and \ - self.urlresult['RetVal'] != 'Update' and \ - self.urlresult['RetVal'] != 'Answer')): - - if ('Reason' in self.urlresult and (self.urlresult['Reason'] == 'BadUserPwd'\ - or self.urlresult['Reason'] == 'AskUnp')) or ('SwitchTo' in self.urlresult\ - and (self.urlresult['SwitchTo'] == 'Dialog')): - if 'ServerSessionData' in self.urlresult: - self.fileopen['ServerSessionData'] = self.urlresult['ServerSessionData'] - if 'DocumentSessionData' in self.urlresult: - self.fileopen['DocumentSessionData'] = self.urlresult['DocumentSessionData'] - buildurl = origurl - buildurl = buildurl + 'Request=DocPerm' - self.gen_pw_dialog() - # password not found - fallback - for keys in burl: - try: - buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] - except: - continue - if DEBUG_MODE == True: debugfile.write( '2ndurl:') - if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') - # try to open the url - try: - u = urllib.urlopen(buildurl) - u.geturl() - result = u.read() - except: - raise ADEPTError('No internet connection or a blocking firewall!') - # getting rid of the line feed - if DEBUG_MODE == True: debugfile.write( '2nd preresult') - if DEBUG_MODE == True: debugfile.write( result+'\n\n') - #get rid of unnecessary characters - result = result.rstrip('\n') - result = result.rstrip(chr(13)) - result = result.lstrip('\n') - result = result.lstrip(chr(13)) - self.urlresult = {} - for pair in result.split('&'): - try: - key, value = pair.split('=',1) - self.urlresult[key] = value - except: - pass - # did it work? - if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ - self.urlresult['RetVal'] != '2' and - self.urlresult['RetVal'] != 'Update' and \ - self.urlresult['RetVal'] != 'Answer')): - raise ADEPTError('Decryption was not successfull.\nReason: ' + self.urlresult['Error']) - # fix for non-standard-conform fileopen pdfs -## if self.fileopen['Length'] != 5 and self.fileopen['Length'] != 16: -## if self.fileopen['V'] == 1: -## self.fileopen['Length'] = 5 -## else: -## self.fileopen['Length'] = 16 - # patch for malformed pdfs - #print len(self.urlresult['Code']) - #print self.urlresult['Code'].encode('hex') - if 'code' in self.urlresult: - self.urlresult['Code'] = self.urlresult['code'] - if 'Code' in self.urlresult: - if len(self.urlresult['Code']) == 5 or len(self.urlresult['Code']) == 16: - self.decrypt_key = self.urlresult['Code'] - else: - self.decrypt_key = self.urlresult['Code'].decode('hex') - else: - raise ADEPTError('Cannot find decryption key.') - self.genkey = self.genkey_v2 - self.decipher = self.decrypt_rc4 - self.ready = True - return - - def gen_pw_dialog(self, Username='Username', Password='Password', Title='User/Password Authentication',\ - OK='Proceed', Text1='Authorization', Text2='Enter Required Data'): - self.pwtk = Tkinter.Tk() - self.pwtk.title(Title) - self.pwtk.minsize(150, 0) - self.label1 = Tkinter.Label(self.pwtk, text=Text1) - self.label2 = Tkinter.Label(self.pwtk, text=Text2) - self.label3 = Tkinter.Label(self.pwtk, text=Username) - self.pwfieldreq = 1 - self.gui = True - self.un_entry = Tkinter.Entry(self.pwtk) - # cursor here - self.un_entry.focus() - self.label4 = Tkinter.Label(self.pwtk, text=Password) - self.pw_entry = Tkinter.Entry(self.pwtk, show="*") - self.button = Tkinter.Button(self.pwtk, text=OK, command=self.fo_save_values) - # widget layout, stack vertical - self.label1.pack() - self.label2.pack() - self.label3.pack() - self.un_entry.pack() - # create a password label and field - if self.pwfieldreq == 1: - self.label4.pack() - self.pw_entry.pack() - self.button.pack() - self.pwtk.update() - # start the event loop - self.pwtk.mainloop() - - # genkey functions - def genkey_v2(self, objid, genno): - objid = struct.pack(' -1: - mac = line.split()[4] - break - return mac.replace(':','') - except: - raise ADEPTError('Cannot find MAC address. Get forum help.') - - def get_win_macaddress(self): - try: - gasize = c_ulong(5000) - p = create_string_buffer(5000) - GetAdaptersInfo = windll.iphlpapi.GetAdaptersInfo - GetAdaptersInfo(byref(p),byref(gasize)) - return p[0x194:0x19a].encode('hex') - except: - raise ADEPTError('Cannot find MAC address. Get forum help.') - - # custom conversion 5 bytes to 8 chars method - def fo_convert5to8(self, edisk): - # byte to number/char mapping table - darray=[0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,\ - 0x46,0x47,0x48,0x4A,0x4B,0x4C,0x4D,0x4E,0x50,0x51,0x52,0x53,0x54,\ - 0x55,0x56,0x57,0x58,0x59,0x5A] - pdid = struct.pack('> 5 - outputhw = outputhw + chr(darray[index]) - pdid = (ord(edisk[4]) << 2)|pdid - # get the last 2 bits from the hwid + low part of the cpuid - for i in range(0,2): - index = pdid & 0x1f - # shift the disk id 5 bits to the right - pdid = pdid >> 5 - outputhw = outputhw + chr(darray[index]) - return outputhw - - # Linux processing - def fo_linux_sethwids(self): - # linux specific attributes - self.fileopen['OSType']='Linux' - self.fileopen['AcroProduct']='AcroReader' - self.fileopen['AcroReader']='Yes' - self.fileopen['AcroVersion']='9.101' - self.fileopen['FSName']='ext3' - self.fileopen['Build']='878' - self.fileopen['ProdVer']='1.8.5.1' - self.fileopen['OSBuild']='2.6.33' - # write hardware keys - hwkey = 0 - pmac = self.get_macaddress().decode("hex"); - self.fileopen['Disk'] = self.fo_convert5to8(pmac[1:]) - # get primary used default mac address - self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:]) - # get uuid - # check for reversed offline handler 6AB83F4Ah + AFh 6AB83F4Ah - if 'LILA' in self.fileopen: - pass - if 'Ident4ID' in self.fileopen: - self.fileopen['User'] = getpass.getuser() - self.fileopen['SaUser'] = getpass.getuser() - try: - cuser = winreg.HKEY_CURRENT_USER - FOW3_UUID = 'Software\\Fileopen' - regkey = winreg.OpenKey(cuser, FOW3_UUID) - userkey = winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0] -# if self.genkey_cryptmach(userkey)[0:4] != 'ec20': - self.fileopen['Uuid'] = self.genkey_cryptmach(userkey)[4:] -## elif self.genkey_cryptmach(userkey)[0:4] != 'ec20': -## self.fileopen['Uuid'] = self.genkey_cryptmach(userkey,1)[4:] -## else: - except: - raise ADEPTError('Cannot find FowP3Uuid file - reason might be Adobe (Reader) X.'\ - 'Read the FAQs for more information how to solve the problem.') - else: - self.fileopen['Uuid'] = str(uuid.uuid1()) - # get time stamp - self.fileopen['Stamp'] = str(time.time())[:-3] - # get fileopen input pdf name + path - self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\ - + urllib.quote(os.path.normpath(INPUTFILEPATH)) - # clear the link - #INPUTFILEPATH = '' -## # get volume name (urllib quote necessairy?) urllib.quote( -## self.fileopen['VolName'] = win32api.GetVolumeInformation("C:\\")[0] -## # get volume serial number -## self.fileopen['VolSN'] = str(win32api.GetVolumeInformation("C:\\")[1]) - return - - # Windows processing - def fo_win_sethwids(self): - # Windows specific attributes - self.fileopen['OSType']='Windows' - self.fileopen['OSName']='Vista' - self.fileopen['OSData']='Service%20Pack%204' - self.fileopen['AcroProduct']='Reader' - self.fileopen['AcroReader']='Yes' - self.fileopen['OSBuild']='7600' - self.fileopen['AcroVersion']='9.1024' - self.fileopen['Build']='879' - # write hardware keys - hwkey = 0 - # get the os type and save it in ostype - try: - import win32api - import win32security - import win32file - except: - raise ADEPTError('PyWin Extension (Win32API module) needed.\n'+\ - 'Download from http://sourceforge.net/projects/pywin32/files/ ') - try: - import winreg - except ImportError: - import _winreg as winreg - try: - v0 = win32api.GetVolumeInformation('C:\\') - v1 = win32api.GetSystemInfo()[6] - # fix for possible negative integer (Python problem) - volserial = v0[1] & 0xffffffff - lowcpu = v1 & 255 - highcpu = (v1 >> 8) & 255 - # changed to int - volserial = struct.pack(' 0 and mode == True: - m.update(key_string[:(13-len(uname))]) - md5sum = m.digest()[0:16] - # print md5sum.encode('hex') - # normal ident4id calculation - retval = [] - for sdata in data: - retval.append(ARC4.new(md5sum).decrypt(sdata)) - for rval in retval: - if rval[:4] == 'ec20': - return rval[4:] - return False - # start normal execution - # list for username variants - unamevars = [] - # fill username variants list - unamevars.append(self.user) - unamevars.append(self.user + chr(0)) - unamevars.append(self.user.lower()) - unamevars.append(self.user.lower() + chr(0)) - unamevars.append(self.user.upper()) - unamevars.append(self.user.upper() + chr(0)) - # go through it - for uname in unamevars: - result = genkeysub(uname, True) - if result != False: - return result - result = genkeysub(uname) - if result != False: - return result - # didn't find it, return false - return False -## raise ADEPTError('Unsupported Ident4D Decryption,\n'+\ -## 'report the bug to the ineptpdf script forum') - - KEYWORD_OBJ = PSKeywordTable.intern('obj') - - def getobj(self, objid): - if not self.ready: - raise PDFException('PDFDocument not initialized') - #assert self.xrefs - if objid in self.objs: - genno = 0 - obj = self.objs[objid] - else: - for xref in self.xrefs: - try: - (stmid, index) = xref.getpos(objid) - break - except KeyError: - pass - else: - #if STRICT: - # raise PDFSyntaxError('Cannot locate objid=%r' % objid) - return None - if stmid: - if gen_xref_stm: - return PDFObjStmRef(objid, stmid, index) -# Stuff from pdfminer: extract objects from object stream - stream = stream_value(self.getobj(stmid)) - if stream.dic.get('Type') is not LITERAL_OBJSTM: - if STRICT: - raise PDFSyntaxError('Not a stream object: %r' % stream) - try: - n = stream.dic['N'] - except KeyError: - if STRICT: - raise PDFSyntaxError('N is not defined: %r' % stream) - n = 0 - - if stmid in self.parsed_objs: - objs = self.parsed_objs[stmid] - else: - parser = PDFObjStrmParser(stream.get_data(), self) - objs = [] - try: - while 1: - (_,obj) = parser.nextobject() - objs.append(obj) - except PSEOF: - pass - self.parsed_objs[stmid] = objs - genno = 0 - i = n*2+index - try: - obj = objs[i] - except IndexError: - raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) - if isinstance(obj, PDFStream): - obj.set_objid(objid, 0) -### - else: - self.parser.seek(index) - (_,objid1) = self.parser.nexttoken() # objid - (_,genno) = self.parser.nexttoken() # genno - #assert objid1 == objid, (objid, objid1) - (_,kwd) = self.parser.nexttoken() - # #### hack around malformed pdf files - # assert objid1 == objid, (objid, objid1) -## if objid1 != objid: -## x = [] -## while kwd is not self.KEYWORD_OBJ: -## (_,kwd) = self.parser.nexttoken() -## x.append(kwd) -## if x: -## objid1 = x[-2] -## genno = x[-1] -## - if kwd is not self.KEYWORD_OBJ: - raise PDFSyntaxError( - 'Invalid object spec: offset=%r' % index) - (_,obj) = self.parser.nextobject() - if isinstance(obj, PDFStream): - obj.set_objid(objid, genno) - if self.decipher: - obj = decipher_all(self.decipher, objid, genno, obj) - self.objs[objid] = obj - return obj - -# helper class for cookie retrival -class WinBrowserCookie(): - def __init__(self): - pass - def getcookie(self, cname, chost): - # check firefox db - fprofile = os.environ['AppData']+r'\Mozilla\Firefox' - pinifile = 'profiles.ini' - fini = os.path.normpath(fprofile + '\\' + pinifile) - try: - with open(fini,'r') as ffini: - firefoxini = ffini.read() - # Firefox not installed or on an USB stick - except: - return None - for pair in firefoxini.split('\n'): - try: - key, value = pair.split('=',1) - if key == 'Path': - fprofile = os.path.normpath(fprofile+'//'+value+'//'+'cookies.sqlite') - break - # asdf - except: - continue - if os.path.isfile(fprofile): - try: - con = sqlite3.connect(fprofile,1) - except: - raise ADEPTError('Firefox Cookie data base locked. Close Firefox and try again') - cur = con.cursor() - try: - cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) - except Exception: - raise ADEPTError('Firefox Cookie database is locked. Close Firefox and try again') - try: - return cur.fetchone()[0] - except Exception: - # sometimes is a dot in front of the host - chost = '.'+chost - cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) - try: - return cur.fetchone()[0] - except: - return None - -class PDFObjStmRef(object): - maxindex = 0 - def __init__(self, objid, stmid, index): - self.objid = objid - self.stmid = stmid - self.index = index - if index > PDFObjStmRef.maxindex: - PDFObjStmRef.maxindex = index - - -## PDFParser -## -class PDFParser(PSStackParser): - - def __init__(self, doc, fp): - PSStackParser.__init__(self, fp) - self.doc = doc - self.doc.set_parser(self) - return - - def __repr__(self): - return '' - - KEYWORD_R = PSKeywordTable.intern('R') - KEYWORD_ENDOBJ = PSKeywordTable.intern('endobj') - KEYWORD_STREAM = PSKeywordTable.intern('stream') - KEYWORD_XREF = PSKeywordTable.intern('xref') - KEYWORD_STARTXREF = PSKeywordTable.intern('startxref') - def do_keyword(self, pos, token): - if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): - self.add_results(*self.pop(1)) - return - if token is self.KEYWORD_ENDOBJ: - self.add_results(*self.pop(4)) - return - - if token is self.KEYWORD_R: - # reference to indirect object - try: - ((_,objid), (_,genno)) = self.pop(2) - (objid, genno) = (int(objid), int(genno)) - obj = PDFObjRef(self.doc, objid, genno) - self.push((pos, obj)) - except PSSyntaxError: - pass - return - - if token is self.KEYWORD_STREAM: - # stream object - ((_,dic),) = self.pop(1) - dic = dict_value(dic) - try: - objlen = int_value(dic['Length']) - except KeyError: - if STRICT: - raise PDFSyntaxError('/Length is undefined: %r' % dic) - objlen = 0 - self.seek(pos) - try: - (_, line) = self.nextline() # 'stream' - except PSEOF: - if STRICT: - raise PDFSyntaxError('Unexpected EOF') - return - pos += len(line) - self.fp.seek(pos) - data = self.fp.read(objlen) - self.seek(pos+objlen) - while 1: - try: - (linepos, line) = self.nextline() - except PSEOF: - if STRICT: - raise PDFSyntaxError('Unexpected EOF') - break - if 'endstream' in line: - i = line.index('endstream') - objlen += i - data += line[:i] - break - objlen += len(line) - data += line - self.seek(pos+objlen) - obj = PDFStream(dic, data, self.doc.decipher) - self.push((pos, obj)) - return - - # others - self.push((pos, token)) - return - - def find_xref(self): - # search the last xref table by scanning the file backwards. - prev = None - for line in self.revreadlines(): - line = line.strip() - if line == 'startxref': break - if line: - prev = line - else: - raise PDFNoValidXRef('Unexpected EOF') - return int(prev) - - # read xref table - def read_xref_from(self, start, xrefs): - self.seek(start) - self.reset() - try: - (pos, token) = self.nexttoken() - except PSEOF: - raise PDFNoValidXRef('Unexpected EOF') - if isinstance(token, int): - # XRefStream: PDF-1.5 - if GEN_XREF_STM == 1: - global gen_xref_stm - gen_xref_stm = True - self.seek(pos) - self.reset() - xref = PDFXRefStream() - xref.load(self) - else: - if token is not self.KEYWORD_XREF: - raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % - (pos, token)) - self.nextline() - xref = PDFXRef() - xref.load(self) - xrefs.append(xref) - trailer = xref.trailer - if 'XRefStm' in trailer: - pos = int_value(trailer['XRefStm']) - self.read_xref_from(pos, xrefs) - if 'Prev' in trailer: - # find previous xref - pos = int_value(trailer['Prev']) - self.read_xref_from(pos, xrefs) - return - - # read xref tables and trailers - def read_xref(self): - xrefs = [] - trailerpos = None - try: - pos = self.find_xref() - self.read_xref_from(pos, xrefs) - except PDFNoValidXRef: - # fallback - self.seek(0) - pat = re.compile(r'^(\d+)\s+(\d+)\s+obj\b') - offsets = {} - xref = PDFXRef() - while 1: - try: - (pos, line) = self.nextline() - except PSEOF: - break - if line.startswith('trailer'): - trailerpos = pos # remember last trailer - m = pat.match(line) - if not m: continue - (objid, genno) = m.groups() - offsets[int(objid)] = (0, pos) - if not offsets: raise - xref.offsets = offsets - if trailerpos: - self.seek(trailerpos) - xref.load_trailer(self) - xrefs.append(xref) - return xrefs - -## PDFObjStrmParser -## -class PDFObjStrmParser(PDFParser): - - def __init__(self, data, doc): - PSStackParser.__init__(self, StringIO(data)) - self.doc = doc - return - - def flush(self): - self.add_results(*self.popall()) - return - - KEYWORD_R = KWD('R') - def do_keyword(self, pos, token): - if token is self.KEYWORD_R: - # reference to indirect object - try: - ((_,objid), (_,genno)) = self.pop(2) - (objid, genno) = (int(objid), int(genno)) - obj = PDFObjRef(self.doc, objid, genno) - self.push((pos, obj)) - except PSSyntaxError: - pass - return - # others - self.push((pos, token)) - return - -### -### My own code, for which there is none else to blame - -class PDFSerializer(object): - def __init__(self, inf, keypath): - global GEN_XREF_STM, gen_xref_stm - gen_xref_stm = GEN_XREF_STM > 1 - self.version = inf.read(8) - inf.seek(0) - self.doc = doc = PDFDocument() - parser = PDFParser(doc, inf) - doc.initialize(keypath) - self.objids = objids = set() - for xref in reversed(doc.xrefs): - trailer = xref.trailer - for objid in xref.objids(): - objids.add(objid) - trailer = dict(trailer) - trailer.pop('Prev', None) - trailer.pop('XRefStm', None) - if 'Encrypt' in trailer: - objids.remove(trailer.pop('Encrypt').objid) - self.trailer = trailer - - def dump(self, outf): - self.outf = outf - self.write(self.version) - self.write('\n%\xe2\xe3\xcf\xd3\n') - doc = self.doc - objids = self.objids - xrefs = {} - maxobj = max(objids) - trailer = dict(self.trailer) - trailer['Size'] = maxobj + 1 - for objid in objids: - obj = doc.getobj(objid) - if isinstance(obj, PDFObjStmRef): - xrefs[objid] = obj - continue - if obj is not None: - try: - genno = obj.genno - except AttributeError: - genno = 0 - xrefs[objid] = (self.tell(), genno) - self.serialize_indirect(objid, obj) - startxref = self.tell() - - if not gen_xref_stm: - self.write('xref\n') - self.write('0 %d\n' % (maxobj + 1,)) - for objid in xrange(0, maxobj + 1): - if objid in xrefs: - # force the genno to be 0 - self.write("%010d 00000 n \n" % xrefs[objid][0]) - else: - self.write("%010d %05d f \n" % (0, 65535)) - - self.write('trailer\n') - self.serialize_object(trailer) - self.write('\nstartxref\n%d\n%%%%EOF' % startxref) - - else: # Generate crossref stream. - - # Calculate size of entries - maxoffset = max(startxref, maxobj) - maxindex = PDFObjStmRef.maxindex - fl2 = 2 - power = 65536 - while maxoffset >= power: - fl2 += 1 - power *= 256 - fl3 = 1 - power = 256 - while maxindex >= power: - fl3 += 1 - power *= 256 - - index = [] - first = None - prev = None - data = [] - # Put the xrefstream's reference in itself - startxref = self.tell() - maxobj += 1 - xrefs[maxobj] = (startxref, 0) - for objid in sorted(xrefs): - if first is None: - first = objid - elif objid != prev + 1: - index.extend((first, prev - first + 1)) - first = objid - prev = objid - objref = xrefs[objid] - if isinstance(objref, PDFObjStmRef): - f1 = 2 - f2 = objref.stmid - f3 = objref.index - else: - f1 = 1 - f2 = objref[0] - # we force all generation numbers to be 0 - # f3 = objref[1] - f3 = 0 - - data.append(struct.pack('>B', f1)) - data.append(struct.pack('>L', f2)[-fl2:]) - data.append(struct.pack('>L', f3)[-fl3:]) - index.extend((first, prev - first + 1)) - data = zlib.compress(''.join(data)) - dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, - 'W': [1, fl2, fl3], 'Length': len(data), - 'Filter': LITERALS_FLATE_DECODE[0], - 'Root': trailer['Root'],} - if 'Info' in trailer: - dic['Info'] = trailer['Info'] - xrefstm = PDFStream(dic, data) - self.serialize_indirect(maxobj, xrefstm) - self.write('startxref\n%d\n%%%%EOF' % startxref) - def write(self, data): - self.outf.write(data) - self.last = data[-1:] - - def tell(self): - return self.outf.tell() - - def escape_string(self, string): - string = string.replace('\\', '\\\\') - string = string.replace('\n', r'\n') - string = string.replace('(', r'\(') - string = string.replace(')', r'\)') - # get rid of ciando id - regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}') - if regularexp.match(string): return ('http://www.ciando.com') - return string - - def serialize_object(self, obj): - if isinstance(obj, dict): - # Correct malformed Mac OS resource forks for Stanza - if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ - and isinstance(obj['Type'], int): - obj['Subtype'] = obj['Type'] - del obj['Type'] - # end - hope this doesn't have bad effects - self.write('<<') - for key, val in obj.items(): - self.write('/%s' % key) - self.serialize_object(val) - self.write('>>') - elif isinstance(obj, list): - self.write('[') - for val in obj: - self.serialize_object(val) - self.write(']') - elif isinstance(obj, str): - self.write('(%s)' % self.escape_string(obj)) - elif isinstance(obj, bool): - if self.last.isalnum(): - self.write(' ') - self.write(str(obj).lower()) - elif isinstance(obj, (int, long, float)): - if self.last.isalnum(): - self.write(' ') - self.write(str(obj)) - elif isinstance(obj, PDFObjRef): - if self.last.isalnum(): - self.write(' ') - self.write('%d %d R' % (obj.objid, 0)) - elif isinstance(obj, PDFStream): - ### If we don't generate cross ref streams the object streams - ### are no longer useful, as we have extracted all objects from - ### them. Therefore leave them out from the output. - if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: - self.write('(deleted)') - else: - data = obj.get_decdata() - self.serialize_object(obj.dic) - self.write('stream\n') - self.write(data) - self.write('\nendstream') - else: - data = str(obj) - if data[0].isalnum() and self.last.isalnum(): - self.write(' ') - self.write(data) - - def serialize_indirect(self, objid, obj): - self.write('%d 0 obj' % (objid,)) - self.serialize_object(obj) - if self.last.isalnum(): - self.write('\n') - self.write('endobj\n') - -def cli_main(argv=sys.argv): - progname = os.path.basename(argv[0]) - if RSA is None: - print "%s: This script requires PyCrypto, which must be installed " \ - "separately. Read the top-of-script comment for details." % \ - (progname,) - return 1 - if len(argv) != 4: - print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,) - return 1 - keypath, inpath, outpath = argv[1:] - with open(inpath, 'rb') as inf: - serializer = PDFSerializer(inf, keypath) - # hope this will fix the 'bad file descriptor' problem - with open(outpath, 'wb') as outf: - # help construct to make sure the method runs to the end - serializer.dump(outf) - return 0 - - -class DecryptionDialog(Tkinter.Frame): - def __init__(self, root): - # debug mode debugging - global DEBUG_MODE - Tkinter.Frame.__init__(self, root, border=5) - ltext='Select file for decryption\n(Ignore Password / Key file option for Fileopen/APS PDFs)' - self.status = Tkinter.Label(self, text=ltext) - self.status.pack(fill=Tkconstants.X, expand=1) - body = Tkinter.Frame(self) - body.pack(fill=Tkconstants.X, expand=1) - sticky = Tkconstants.E + Tkconstants.W - body.grid_columnconfigure(1, weight=2) - Tkinter.Label(body, text='Password\nor Key file').grid(row=0) - self.keypath = Tkinter.Entry(body, width=30) - self.keypath.grid(row=0, column=1, sticky=sticky) - if os.path.exists('adeptkey.der'): - self.keypath.insert(0, 'adeptkey.der') - button = Tkinter.Button(body, text="...", command=self.get_keypath) - button.grid(row=0, column=2) - Tkinter.Label(body, text='Input file').grid(row=1) - self.inpath = Tkinter.Entry(body, width=30) - self.inpath.grid(row=1, column=1, sticky=sticky) - button = Tkinter.Button(body, text="...", command=self.get_inpath) - button.grid(row=1, column=2) - Tkinter.Label(body, text='Output file').grid(row=2) - self.outpath = Tkinter.Entry(body, width=30) - self.outpath.grid(row=2, column=1, sticky=sticky) - debugmode = Tkinter.Checkbutton(self, text = "Debug Mode (writable directory required)", command=self.debug_toggle, height=2, \ - width = 40) - debugmode.pack() - button = Tkinter.Button(body, text="...", command=self.get_outpath) - button.grid(row=2, column=2) - buttons = Tkinter.Frame(self) - buttons.pack() - - - botton = Tkinter.Button( - buttons, text="Decrypt", width=10, command=self.decrypt) - botton.pack(side=Tkconstants.LEFT) - Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) - button = Tkinter.Button( - buttons, text="Quit", width=10, command=self.quit) - button.pack(side=Tkconstants.RIGHT) - - - def get_keypath(self): - keypath = tkFileDialog.askopenfilename( - parent=None, title='Select ADEPT key file', - defaultextension='.der', filetypes=[('DER-encoded files', '.der'), - ('All Files', '.*')]) - if keypath: - keypath = os.path.normpath(os.path.realpath(keypath)) - self.keypath.delete(0, Tkconstants.END) - self.keypath.insert(0, keypath) - return - - def get_inpath(self): - inpath = tkFileDialog.askopenfilename( - parent=None, title='Select ADEPT or FileOpen-encrypted PDF file to decrypt', - defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), - ('All files', '.*')]) - if inpath: - inpath = os.path.normpath(os.path.realpath(inpath)) - self.inpath.delete(0, Tkconstants.END) - self.inpath.insert(0, inpath) - return - - def debug_toggle(self): - global DEBUG_MODE - if DEBUG_MODE == False: - DEBUG_MODE = True - else: - DEBUG_MODE = False - - def get_outpath(self): - outpath = tkFileDialog.asksaveasfilename( - parent=None, title='Select unencrypted PDF file to produce', - defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), - ('All files', '.*')]) - if outpath: - outpath = os.path.normpath(os.path.realpath(outpath)) - self.outpath.delete(0, Tkconstants.END) - self.outpath.insert(0, outpath) - return - - def decrypt(self): - global INPUTFILEPATH - global KEYFILEPATH - global PASSWORD - keypath = self.keypath.get() - inpath = self.inpath.get() - outpath = self.outpath.get() - if not keypath or not os.path.exists(keypath): - # keyfile doesn't exist - KEYFILEPATH = False - PASSWORD = keypath - if not inpath or not os.path.exists(inpath): - self.status['text'] = 'Specified input file does not exist' - return - if not outpath: - self.status['text'] = 'Output file not specified' - return - if inpath == outpath: - self.status['text'] = 'Must have different input and output files' - return - # patch for non-ascii characters - INPUTFILEPATH = inpath.encode('utf-8') - argv = [sys.argv[0], keypath, inpath, outpath] - self.status['text'] = 'Processing ...' - try: - cli_main(argv) - except Exception, a: - self.status['text'] = 'Error: ' + str(a) - return - self.status['text'] = 'File successfully decrypted.\n'+\ - 'Close this window or decrypt another pdf file.' - return - -def gui_main(): - root = Tkinter.Tk() - if RSA is None: - root.withdraw() - tkMessageBox.showerror( - "INEPT PDF and FileOpen Decrypter", - "This script requires PyCrypto, which must be installed " - "separately. Read the top-of-script comment for details.") - return 1 - root.title('INEPT PDF Decrypter 8.4.51 (FileOpen/APS-Support)') - root.resizable(True, False) - root.minsize(370, 0) - DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) - root.mainloop() - return 0 - - -if __name__ == '__main__': - if len(sys.argv) > 1: - sys.exit(cli_main()) - sys.exit(gui_main()) diff --git a/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw b/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw new file mode 100644 index 0000000..5e647e1 --- /dev/null +++ b/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw @@ -0,0 +1,3067 @@ +#! /usr/bin/python + +# ineptpdf + +# To run this program install Python 2.7 from http://www.python.org/download/ +# +# PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto +# +# and PyWin Extension (Win32API module) from +# http://sourceforge.net/projects/pywin32/files/ +# +# Make sure to install the dedicated versions for Python 2.7. +# +# It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit +# Windows system). +# +# Save this script file as +# ineptpdf8.4.51.pyw and double-click on it to run it. + +# Revision history: +# 1 - Initial release +# 2 - Improved determination of key-generation algorithm +# 3 - Correctly handle PDF >=1.5 cross-reference streams +# 4 - Removal of ciando's personal ID (anon) +# 5 - removing small bug with V3 ebooks (anon) +# 6 - changed to adeptkey4.der format for 1.7.2 support (anon) +# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon) +# 7 - Get cross reference streams and object streams working for input. +# Not yet supported on output but this only effects file size, +# not functionality. (anon2) +# 7.1 - Correct a problem when an old trailer is not followed by startxref (anon2) +# 7.2 - Correct malformed Mac OS resource forks for Stanza +# - Support for cross ref streams on output (decreases file size) (anon2) +# 7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2) +# "The root object is missing or invalid" in Adobe Reader. +# 7.4 - Force all generation numbers in output file to be 0, like in v6. +# Fallback code for wrong xref improved (search till last trailer +# instead of first) (anon2) +# 8 - fileopen user machine identifier support (Tetrachroma) +# 8.1 - fileopen user cookies support (Tetrachroma) +# 8.2 - fileopen user name/password support (Tetrachroma) +# 8.3 - fileopen session cookie support (Tetrachroma) +# 8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma) +# 8.3.2 - improved server result parsing (Tetrachroma) +# 8.4 - Ident4D and encrypted Uuid support (Tetrachroma) +# 8.4.1 - improved MAC address processing (Tetrachroma) +# 8.4.2 - FowP3Uuid fallback file processing (Tetrachroma) +# 8.4.3 - improved user/password pdf file detection (Tetrachroma) +# 8.4.4 - small bugfix (Tetrachroma) +# 8.4.5 - improved cookie host searching (Tetrachroma) +# 8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma) +# 8.4.7 - UTF-8 input file conversion (Tetrachroma) +# 8.4.8 - fix for more rare utf8 problems (Tetrachroma) +# 8.4.9 - solution for utf8 in comination with +# ident4id method (Tetrachroma) +# 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma) +# 8.4.11 - alternative ident4id calculation (Tetrachroma) +# 8.4.12 - fix for capital username characters and +# other unusual user login names (Tetrachroma & ZeroPoint) +# 8.4.13 - small bug fixes (Tetrachroma) +# 8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma) +# 8.4.15 - 'bad file descriptor'-fix (Tetrachroma) +# 8.4.16 - improves user/pass detection (Tetrachroma) +# 8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma) +# 8.4.18 - follow up bug fix for the DPRM problem, +# more readable error messages (Tetrachroma) +# 8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma) +# 8.4.20 - follow up patch (Tetrachroma) +# 8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma) +# 8.4.22 - disable prints for exception prevention (Tetrachroma) +# 8.4.23 - check for additional security attributes (Tetrachroma) +# 8.4.24 - improved cookie session support (Tetrachroma) +# 8.4.25 - more compatibility with unicode files (Tetrachroma) +# 8.4.26 - automated session/user cookie request function (works +# only with Firefox 3.x+) (Tetrachroma) +# 8.4.27 - user/password fallback +# 8.4.28 - AES decryption, improved misconfigured pdf handling, +# limited experimental APS support (Tetrachroma & Neisklar) +# 8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma) +# 8.4.30 - extended authorization attributes support (Tetrachroma) +# 8.4.31 - improved session cookie and better server response error +# handling (Tetrachroma) +# 8.4.33 - small cookie optimizations (Tetrachroma) +# 8.4.33 - debug output option (Tetrachroma) +# 8.4.34 - better user/password management +# handles the 'AskUnp' response) (Tetrachroma) +# 8.4.35 - special handling for non-standard systems (Tetrachroma) +# 8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma) +# 8.4.36 - FOPN_flock support (Tetrachroma) +# 8.4.37 - patch for unicode paths/filenames (Tetrachroma) +# 8.4.38 - small fix for user/password dialog (Tetrachroma) +# 8.4.39 - sophisticated request mode differentiation, forced +# uuid calculation (Tetrachroma) +# 8.4.40 - fix for non standard server responses (Tetrachroma) +# 8.4.41 - improved user/password request windows, +# better server response tolerance (Tetrachroma) +# 8.4.42 - improved nl/cr server response parsing (Tetrachroma) +# 8.4.43 - fix for user names longer than 13 characters and special +# uuid encryption (Tetrachroma) +# 8.4.44 - another fix for ident4d problem (Tetrachroma) +# 8.4.45 - 2nd fix for ident4d problem (Tetrachroma) +# 8.4.46 - script cleanup and optimizations (Tetrachroma) +# 8.4.47 - script identification change to Adobe Reader (Tetrachroma) +# 8.4.48 - improved tolerance for false file/registry entries (Tetrachroma) +# 8.4.49 - improved username encryption (Tetrachroma) +# 8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar) +# 8.4.51 - automatic APS offline key retrieval (works only for +# Onleihe right now) (80ka80 & Tetrachroma) + +# 8.5.0 - First update by noDRM - trying to update the script to include +# improvements from ineptpdf. + +""" +Decrypts Adobe ADEPT-encrypted and Fileopen PDF files. +""" + +from __future__ import with_statement + +__license__ = 'GPL v3' + +import sys +import os +import re +import zlib +import struct +import hashlib +from itertools import chain, islice +import xml.etree.ElementTree as etree +import Tkinter +import Tkconstants +import tkFileDialog +import tkMessageBox +# added for fileopen support +import urllib +import urlparse +import time +import socket +import string +import uuid +import subprocess +import time +import getpass +from ctypes import * +import traceback +import inspect +import tempfile +import sqlite3 +import httplib + +from decimal import Decimal +import itertools + +try: + from Crypto.Cipher import ARC4 + # needed for newer pdfs + from Crypto.Cipher import AES + from Crypto.Hash import SHA256 + from Crypto.PublicKey import RSA + +except ImportError: + ARC4 = None + RSA = None + +from io import BytesIO + +class ADEPTError(Exception): + pass + +# global variable (needed for fileopen and password decryption) +INPUTFILEPATH = '' +KEYFILEPATH = '' +PASSWORD = '' +DEBUG_MODE = False +IVERSION = '8.4.51' + +# Do we generate cross reference streams on output? +# 0 = never +# 1 = only if present in input +# 2 = always + +GEN_XREF_STM = 1 + +# This is the value for the current document +gen_xref_stm = False # will be set in PDFSerializer + +### +### ASN.1 parsing code from tlslite + +def bytesToNumber(bytes): + total = 0L + for byte in bytes: + total = (total << 8) + byte + return total + +class ASN1Error(Exception): + pass + +class ASN1Parser(object): + class Parser(object): + def __init__(self, bytes): + self.bytes = bytes + self.index = 0 + + def get(self, length): + if self.index + length > len(self.bytes): + raise ASN1Error("Error decoding ASN.1") + x = 0 + for count in range(length): + x <<= 8 + x |= self.bytes[self.index] + self.index += 1 + return x + + def getFixBytes(self, lengthBytes): + bytes = self.bytes[self.index : self.index+lengthBytes] + self.index += lengthBytes + return bytes + + def getVarBytes(self, lengthLength): + lengthBytes = self.get(lengthLength) + return self.getFixBytes(lengthBytes) + + def getFixList(self, length, lengthList): + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def getVarList(self, length, lengthLength): + lengthList = self.get(lengthLength) + if lengthList % length != 0: + raise ASN1Error("Error decoding ASN.1") + lengthList = int(lengthList/length) + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def startLengthCheck(self, lengthLength): + self.lengthCheck = self.get(lengthLength) + self.indexCheck = self.index + + def setLengthCheck(self, length): + self.lengthCheck = length + self.indexCheck = self.index + + def stopLengthCheck(self): + if (self.index - self.indexCheck) != self.lengthCheck: + raise ASN1Error("Error decoding ASN.1") + + def atLengthCheck(self): + if (self.index - self.indexCheck) < self.lengthCheck: + return False + elif (self.index - self.indexCheck) == self.lengthCheck: + return True + else: + raise ASN1Error("Error decoding ASN.1") + + def __init__(self, bytes): + p = self.Parser(bytes) + p.get(1) + self.length = self._getASN1Length(p) + self.value = p.getFixBytes(self.length) + + def getChild(self, which): + p = self.Parser(self.value) + for x in range(which+1): + markIndex = p.index + p.get(1) + length = self._getASN1Length(p) + p.getFixBytes(length) + return ASN1Parser(p.bytes[markIndex:p.index]) + + def _getASN1Length(self, p): + firstLength = p.get(1) + if firstLength<=127: + return firstLength + else: + lengthLength = firstLength & 0x7F + return p.get(lengthLength) + +### +### PDF parsing routines from pdfminer, with changes for EBX_HANDLER + +## Utilities +## +def choplist(n, seq): + '''Groups every n elements of the list.''' + r = [] + for x in seq: + r.append(x) + if len(r) == n: + yield tuple(r) + r = [] + return + +def nunpack(s, default=0): + '''Unpacks up to 4 bytes big endian.''' + l = len(s) + if not l: + return default + elif l == 1: + return ord(s) + elif l == 2: + return struct.unpack('>H', s)[0] + elif l == 3: + if sys.version_info[0] == 2: + return struct.unpack('>L', '\x00'+s)[0] + else: + return struct.unpack('>L', bytes([0]) + s)[0] + elif l == 4: + return struct.unpack('>L', s)[0] + else: + return TypeError('invalid length: %d' % l) + + +STRICT = 0 + + +## PS Exceptions +## +class PSException(Exception): pass +class PSEOF(PSException): pass +class PSSyntaxError(PSException): pass +class PSTypeError(PSException): pass +class PSValueError(PSException): pass + + +## Basic PostScript Types +## + +# PSLiteral +class PSObject(object): pass + +class PSLiteral(PSObject): + ''' + PS literals (e.g. "/Name"). + Caution: Never create these objects directly. + Use PSLiteralTable.intern() instead. + ''' + def __init__(self, name): + self.name = name + return + + def __repr__(self): + name = [] + for char in self.name: + if not char.isalnum(): + char = '#%02x' % ord(char) + name.append(char) + return '/%s' % ''.join(name) + +# PSKeyword +class PSKeyword(PSObject): + ''' + PS keywords (e.g. "showpage"). + Caution: Never create these objects directly. + Use PSKeywordTable.intern() instead. + ''' + def __init__(self, name): + self.name = name.decode('utf-8') + return + + def __repr__(self): + return self.name + +# PSSymbolTable +class PSSymbolTable(object): + + ''' + Symbol table that stores PSLiteral or PSKeyword. + ''' + + def __init__(self, classe): + self.dic = {} + self.classe = classe + return + + def intern(self, name): + if name in self.dic: + lit = self.dic[name] + else: + lit = self.classe(name) + self.dic[name] = lit + return lit + +PSLiteralTable = PSSymbolTable(PSLiteral) +PSKeywordTable = PSSymbolTable(PSKeyword) +LIT = PSLiteralTable.intern +KWD = PSKeywordTable.intern +KEYWORD_BRACE_BEGIN = KWD(b'{') +KEYWORD_BRACE_END = KWD(b'}') +KEYWORD_ARRAY_BEGIN = KWD(b'[') +KEYWORD_ARRAY_END = KWD(b']') +KEYWORD_DICT_BEGIN = KWD(b'<<') +KEYWORD_DICT_END = KWD(b'>>') + + +def literal_name(x): + if not isinstance(x, PSLiteral): + if STRICT: + raise PSTypeError('Literal required: %r' % x) + else: + return str(x) + return x.name + +def keyword_name(x): + if not isinstance(x, PSKeyword): + if STRICT: + raise PSTypeError('Keyword required: %r' % x) + else: + return str(x) + return x.name + + +## PSBaseParser +## +EOL = re.compile(br'[\r\n]') +SPC = re.compile(br'\s') +NONSPC = re.compile(br'\S') +HEX = re.compile(br'[0-9a-fA-F]') +END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(br'[^0-9]') +END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(br'[()\\]') +OCT_STRING = re.compile(br'[0-7]') +ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } + +class EmptyArrayValue(object): + def __str__(self): + return "<>" + + +class PSBaseParser(object): + + ''' + Most basic PostScript parser that performs only basic tokenization. + ''' + BUFSIZ = 4096 + + def __init__(self, fp): + self.fp = fp + self.seek(0) + return + + def __repr__(self): + return '' % (self.fp, self.bufpos) + + def flush(self): + return + + def close(self): + self.flush() + return + + def tell(self): + return self.bufpos+self.charpos + + def poll(self, pos=None, n=80): + pos0 = self.fp.tell() + if not pos: + pos = self.bufpos+self.charpos + self.fp.seek(pos) + ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) + self.fp.seek(pos0) + return + + def seek(self, pos): + ''' + Seeks the parser to the given position. + ''' + self.fp.seek(pos) + # reset the status for nextline() + self.bufpos = pos + self.buf = b'' + self.charpos = 0 + # reset the status for nexttoken() + self.parse1 = self.parse_main + self.tokens = [] + return + + def fillbuf(self): + if self.charpos < len(self.buf): return + # fetch next chunk. + self.bufpos = self.fp.tell() + self.buf = self.fp.read(self.BUFSIZ) + if not self.buf: + raise PSEOF('Unexpected EOF') + self.charpos = 0 + return + + def parse_main(self, s, i): + m = NONSPC.search(s, i) + if not m: + return (self.parse_main, len(s)) + j = m.start(0) + if isinstance(s[j], str): + # Python 2 + c = s[j] + else: + # Python 3 + c = bytes([s[j]]) + self.tokenstart = self.bufpos+j + if c == b'%': + self.token = c + return (self.parse_comment, j+1) + if c == b'/': + self.token = b'' + return (self.parse_literal, j+1) + if c in b'-+' or c.isdigit(): + self.token = c + return (self.parse_number, j+1) + if c == b'.': + self.token = c + return (self.parse_decimal, j+1) + if c.isalpha(): + self.token = c + return (self.parse_keyword, j+1) + if c == b'(': + self.token = b'' + self.paren = 1 + return (self.parse_string, j+1) + if c == b'<': + self.token = b'' + return (self.parse_wopen, j+1) + if c == b'>': + self.token = b'' + return (self.parse_wclose, j+1) + self.add_token(KWD(c)) + return (self.parse_main, j+1) + + def add_token(self, obj): + self.tokens.append((self.tokenstart, obj)) + return + + def parse_comment(self, s, i): + m = EOL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_comment, len(s)) + j = m.start(0) + self.token += s[i:j] + # We ignore comments. + #self.tokens.append(self.token) + return (self.parse_main, j) + + def parse_literal(self, s, i): + m = END_LITERAL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_literal, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'#': + self.hex = b'' + return (self.parse_literal_hex, j+1) + self.add_token(LIT(self.token)) + return (self.parse_main, j) + + def parse_literal_hex(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if HEX.match(c) and len(self.hex) < 2: + self.hex += c + return (self.parse_literal_hex, i+1) + if self.hex: + if sys.version_info[0] == 2: + self.token += chr(int(self.hex, 16)) + else: + self.token += bytes([int(self.hex, 16)]) + return (self.parse_literal, i) + + + def parse_number(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_number, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'.': + self.token += c + return (self.parse_decimal, j+1) + try: + self.add_token(int(self.token)) + except ValueError: + pass + return (self.parse_main, j) + + def parse_decimal(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_decimal, len(s)) + j = m.start(0) + self.token += s[i:j] + self.add_token(Decimal(self.token.decode('utf-8'))) + return (self.parse_main, j) + + + def parse_keyword(self, s, i): + m = END_KEYWORD.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_keyword, len(s)) + j = m.start(0) + self.token += s[i:j] + if self.token == 'true': + token = True + elif self.token == 'false': + token = False + else: + token = KWD(self.token) + self.add_token(token) + return (self.parse_main, j) + + def parse_string(self, s, i): + m = END_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_string, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'\\': + self.oct = '' + return (self.parse_string_1, j+1) + if c == b'(': + self.paren += 1 + self.token += c + return (self.parse_string, j+1) + if c == b')': + self.paren -= 1 + if self.paren: + self.token += c + return (self.parse_string, j+1) + self.add_token(self.token) + return (self.parse_main, j+1) + + + def parse_string_1(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if OCT_STRING.match(c) and len(self.oct) < 3: + self.oct += c + return (self.parse_string_1, i+1) + if self.oct: + if sys.version_info[0] == 2: + self.token += chr(int(self.oct, 8)) + else: + self.token += bytes([int(self.oct, 8)]) + return (self.parse_string, i) + if c in ESC_STRING: + + if sys.version_info[0] == 2: + self.token += chr(ESC_STRING[c]) + else: + self.token += bytes([ESC_STRING[c]]) + + return (self.parse_string, i+1) + + def parse_wopen(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c.isspace() or HEX.match(c): + return (self.parse_hexstring, i) + if c == b'<': + self.add_token(KEYWORD_DICT_BEGIN) + i += 1 + if c == b'>': + # Empty array without any contents. Why though? + # We need to add some dummy python object that will serialize to + # nothing, otherwise the code removes the whole array. + self.add_token(EmptyArrayValue()) + i += 1 + + return (self.parse_main, i) + + def parse_wclose(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c == b'>': + self.add_token(KEYWORD_DICT_END) + i += 1 + return (self.parse_main, i) + + def parse_hexstring(self, s, i): + m = END_HEX_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_hexstring, len(s)) + j = m.start(0) + self.token += s[i:j] + if sys.version_info[0] == 2: + token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), + SPC.sub('', self.token)) + else: + token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]), + SPC.sub(b'', self.token)) + self.add_token(token) + return (self.parse_main, j) + + def nexttoken(self): + while not self.tokens: + self.fillbuf() + (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) + token = self.tokens.pop(0) + return token + + def nextline(self): + ''' + Fetches a next line that ends either with \\r or \\n. + ''' + linebuf = b'' + linepos = self.bufpos + self.charpos + eol = False + while 1: + self.fillbuf() + if eol: + if sys.version_info[0] == 2: + c = self.buf[self.charpos] + else: + c = bytes([self.buf[self.charpos]]) + + # handle '\r\n' + if c == b'\n': + linebuf += c + self.charpos += 1 + break + m = EOL.search(self.buf, self.charpos) + if m: + linebuf += self.buf[self.charpos:m.end(0)] + self.charpos = m.end(0) + if sys.version_info[0] == 2: + if linebuf[-1] == b'\r': + eol = True + else: + break + else: + if bytes([linebuf[-1]]) == b'\r': + eol = True + else: + break + + else: + linebuf += self.buf[self.charpos:] + self.charpos = len(self.buf) + return (linepos, linebuf) + + def revreadlines(self): + ''' + Fetches a next line backword. This is used to locate + the trailers at the end of a file. + ''' + self.fp.seek(0, 2) + pos = self.fp.tell() + buf = b'' + while 0 < pos: + prevpos = pos + pos = max(0, pos-self.BUFSIZ) + self.fp.seek(pos) + s = self.fp.read(prevpos-pos) + if not s: break + while 1: + n = max(s.rfind(b'\r'), s.rfind(b'\n')) + if n == -1: + buf = s + buf + break + yield s[n:]+buf + s = s[:n] + buf = b'' + return + + +## PSStackParser +## +class PSStackParser(PSBaseParser): + + def __init__(self, fp): + PSBaseParser.__init__(self, fp) + self.reset() + return + + def reset(self): + self.context = [] + self.curtype = None + self.curstack = [] + self.results = [] + return + + def seek(self, pos): + PSBaseParser.seek(self, pos) + self.reset() + return + + def push(self, *objs): + self.curstack.extend(objs) + return + def pop(self, n): + objs = self.curstack[-n:] + self.curstack[-n:] = [] + return objs + def popall(self): + objs = self.curstack + self.curstack = [] + return objs + def add_results(self, *objs): + self.results.extend(objs) + return + + def start_type(self, pos, type): + self.context.append((pos, self.curtype, self.curstack)) + (self.curtype, self.curstack) = (type, []) + return + def end_type(self, type): + if self.curtype != type: + raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) + objs = [ obj for (_,obj) in self.curstack ] + (pos, self.curtype, self.curstack) = self.context.pop() + return (pos, objs) + + def do_keyword(self, pos, token): + return + + def nextobject(self, direct=False): + ''' + Yields a list of objects: keywords, literals, strings (byte arrays), + numbers, arrays and dictionaries. Arrays and dictionaries + are represented as Python sequence and dictionaries. + ''' + while not self.results: + (pos, token) = self.nexttoken() + if (isinstance(token, int) or + isinstance(token, Decimal) or + isinstance(token, bool) or + isinstance(token, bytearray) or + isinstance(token, bytes) or + isinstance(token, str) or + isinstance(token, PSLiteral)): + # normal token + self.push((pos, token)) + elif token == KEYWORD_ARRAY_BEGIN: + # begin array + self.start_type(pos, 'a') + elif token == KEYWORD_ARRAY_END: + # end array + try: + self.push(self.end_type('a')) + except PSTypeError: + if STRICT: raise + elif token == KEYWORD_DICT_BEGIN: + # begin dictionary + self.start_type(pos, 'd') + elif token == KEYWORD_DICT_END: + # end dictionary + try: + (pos, objs) = self.end_type('d') + if len(objs) % 2 != 0: + print("Incomplete dictionary construct") + objs.append("") # this isn't necessary. + # temporary fix. is this due to rental books? + # raise PSSyntaxError( + # 'Invalid dictionary construct: %r' % objs) + d = dict((literal_name(k), v) \ + for (k,v) in choplist(2, objs)) + self.push((pos, d)) + except PSTypeError: + if STRICT: raise + else: + self.do_keyword(pos, token) + if self.context: + continue + else: + if direct: + return self.pop(1)[0] + self.flush() + obj = self.results.pop(0) + return obj + + +LITERAL_CRYPT = LIT(b'Crypt') +LITERALS_FLATE_DECODE = (LIT(b'FlateDecode'), LIT(b'Fl')) +LITERALS_LZW_DECODE = (LIT(b'LZWDecode'), LIT(b'LZW')) +LITERALS_ASCII85_DECODE = (LIT(b'ASCII85Decode'), LIT(b'A85')) + + +## PDF Objects +## +class PDFObject(PSObject): pass + +class PDFException(PSException): pass +class PDFTypeError(PDFException): pass +class PDFValueError(PDFException): pass +class PDFNotImplementedError(PSException): pass + + +## PDFObjRef +## +class PDFObjRef(PDFObject): + + def __init__(self, doc, objid, genno): + if objid == 0: + if STRICT: + raise PDFValueError('PDF object id cannot be 0.') + self.doc = doc + self.objid = objid + self.genno = genno + return + + def __repr__(self): + return '' % (self.objid, self.genno) + + def resolve(self): + return self.doc.getobj(self.objid) + + +# resolve +def resolve1(x): + ''' + Resolve an object. If this is an array or dictionary, + it may still contains some indirect objects inside. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + return x + +def resolve_all(x): + ''' + Recursively resolve X and all the internals. + Make sure there is no indirect reference within the nested object. + This procedure might be slow. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + if isinstance(x, list): + x = [ resolve_all(v) for v in x ] + elif isinstance(x, dict): + for (k,v) in x.iteritems(): + x[k] = resolve_all(v) + return x + +def decipher_all(decipher, objid, genno, x): + ''' + Recursively decipher X. + ''' + if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str): + return decipher(objid, genno, x) + decf = lambda v: decipher_all(decipher, objid, genno, v) + if isinstance(x, list): + x = [decf(v) for v in x] + elif isinstance(x, dict): + x = dict((k, decf(v)) for (k, v) in iter(x.items())) + return x + + +# Type cheking +def int_value(x): + x = resolve1(x) + if not isinstance(x, int): + if STRICT: + raise PDFTypeError('Integer required: %r' % x) + return 0 + return x + +def decimal_value(x): + x = resolve1(x) + if not isinstance(x, Decimal): + if STRICT: + raise PDFTypeError('Decimal required: %r' % x) + return 0.0 + return x + +def num_value(x): + x = resolve1(x) + if not (isinstance(x, int) or isinstance(x, Decimal)): + if STRICT: + raise PDFTypeError('Int or Decimal required: %r' % x) + return 0 + return x + +def str_value(x): + x = resolve1(x) + if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)): + if STRICT: + raise PDFTypeError('String required: %r' % x) + return '' + return x + +def list_value(x): + x = resolve1(x) + if not (isinstance(x, list) or isinstance(x, tuple)): + if STRICT: + raise PDFTypeError('List required: %r' % x) + return [] + return x + +def dict_value(x): + x = resolve1(x) + if not isinstance(x, dict): + if STRICT: + raise PDFTypeError('Dict required: %r' % x) + return {} + return x + +def stream_value(x): + x = resolve1(x) + if not isinstance(x, PDFStream): + if STRICT: + raise PDFTypeError('PDFStream required: %r' % x) + return PDFStream({}, '') + return x + +# ascii85decode(data) +def ascii85decode(data): + n = b = 0 + out = b'' + for c in data: + if b'!' <= c and c <= b'u': + n += 1 + b = b*85+(c-33) + if n == 5: + out += struct.pack('>L',b) + n = b = 0 + elif c == b'z': + assert n == 0 + out += b'\0\0\0\0' + elif c == b'~': + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack('>L',b)[:n-1] + break + return out + + +## PDFStream type +class PDFStream(PDFObject): + def __init__(self, dic, rawdata, decipher=None): + length = int_value(dic.get('Length', 0)) + eol = rawdata[length:] + # quick and dirty fix for false length attribute, + # might not work if the pdf stream parser has a problem + if decipher != None and decipher.__name__ == 'decrypt_aes': + if (len(rawdata) % 16) != 0: + cutdiv = len(rawdata) // 16 + rawdata = rawdata[:16*cutdiv] + else: + if eol in (b'\r', b'\n', b'\r\n'): + rawdata = rawdata[:length] + + self.dic = dic + self.rawdata = rawdata + self.decipher = decipher + self.data = None + self.decdata = None + self.objid = None + self.genno = None + return + + def set_objid(self, objid, genno): + self.objid = objid + self.genno = genno + return + + def __repr__(self): + if self.rawdata: + return '' % \ + (self.objid, len(self.rawdata), self.dic) + else: + return '' % \ + (self.objid, len(self.data), self.dic) + + def decode(self): + assert self.data is None and self.rawdata is not None + data = self.rawdata + if self.decipher: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + if gen_xref_stm: + self.decdata = data # keep decrypted data + if 'Filter' not in self.dic: + self.data = data + self.rawdata = None + ##print self.dict + return + filters = self.dic['Filter'] + if not isinstance(filters, list): + filters = [ filters ] + for f in filters: + if f in LITERALS_FLATE_DECODE: + # will get errors if the document is encrypted. + data = zlib.decompress(data) + elif f in LITERALS_LZW_DECODE: + data = ''.join(LZWDecoder(BytesIO(data)).run()) + elif f in LITERALS_ASCII85_DECODE: + data = ascii85decode(data) + elif f == LITERAL_CRYPT: + raise PDFNotImplementedError('/Crypt filter is unsupported') + else: + raise PDFNotImplementedError('Unsupported filter: %r' % f) + # apply predictors + if 'DP' in self.dic: + params = self.dic['DP'] + else: + params = self.dic.get('DecodeParms', {}) + if 'Predictor' in params: + pred = int_value(params['Predictor']) + if pred: + if pred != 12: + raise PDFNotImplementedError( + 'Unsupported predictor: %r' % pred) + if 'Columns' not in params: + raise PDFValueError( + 'Columns undefined for predictor=12') + columns = int_value(params['Columns']) + buf = b'' + ent0 = b'\x00' * columns + for i in range(0, len(data), columns+1): + pred = data[i] + ent1 = data[i+1:i+1+columns] + if sys.version_info[0] == 2: + if pred == '\x02': + ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ + for (a,b) in zip(ent0,ent1)) + else: + if pred == 2: + ent1 = b''.join(bytes([(a+b) & 255]) \ + for (a,b) in zip(ent0,ent1)) + buf += ent1 + ent0 = ent1 + data = buf + self.data = data + self.rawdata = None + return + + def get_data(self): + if self.data is None: + self.decode() + return self.data + + def get_rawdata(self): + return self.rawdata + + def get_decdata(self): + if self.decdata is not None: + return self.decdata + data = self.rawdata + if self.decipher and data: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + return data + + +## PDF Exceptions +## +class PDFSyntaxError(PDFException): pass +class PDFNoValidXRef(PDFSyntaxError): pass +class PDFEncryptionError(PDFException): pass +class PDFPasswordIncorrect(PDFEncryptionError): pass + +# some predefined literals and keywords. +LITERAL_OBJSTM = LIT(b'ObjStm') +LITERAL_XREF = LIT(b'XRef') +LITERAL_PAGE = LIT(b'Page') +LITERAL_PAGES = LIT(b'Pages') +LITERAL_CATALOG = LIT(b'Catalog') + + +## XRefs +## + +## PDFXRef +## +class PDFXRef(object): + + def __init__(self): + self.offsets = None + return + + def __repr__(self): + return '' % len(self.offsets) + + def objids(self): + return iter(self.offsets.keys()) + + def load(self, parser): + self.offsets = {} + while 1: + try: + (pos, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + if not line: + raise PDFNoValidXRef('Premature eof: %r' % parser) + if line.startswith(b'trailer'): + parser.seek(pos) + break + f = line.strip().split(b' ') + if len(f) != 2: + raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) + try: + (start, nobjs) = map(int, f) + except ValueError: + raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) + for objid in range(start, start+nobjs): + try: + (_, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + f = line.strip().split(b' ') + if len(f) != 3: + raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) + (pos, genno, use) = f + if use != b'n': + continue + self.offsets[objid] = (int(genno.decode('utf-8')), int(pos.decode('utf-8'))) + self.load_trailer(parser) + return + + KEYWORD_TRAILER = PSKeywordTable.intern(b'trailer') + def load_trailer(self, parser): + try: + (_,kwd) = parser.nexttoken() + assert kwd is self.KEYWORD_TRAILER + (_,dic) = parser.nextobject(direct=True) + except PSEOF: + x = parser.pop(1) + if not x: + raise PDFNoValidXRef('Unexpected EOF - file corrupted') + (_,dic) = x[0] + self.trailer = dict_value(dic) + return + + def getpos(self, objid): + try: + (genno, pos) = self.offsets[objid] + except KeyError: + raise + return (None, pos) + + +## PDFXRefStream +## +class PDFXRefStream(object): + + def __init__(self): + self.index = None + self.data = None + self.entlen = None + self.fl1 = self.fl2 = self.fl3 = None + return + + def __repr__(self): + return '' % self.index + + def objids(self): + for first, size in self.index: + for objid in range(first, first + size): + yield objid + + def load(self, parser, debug=0): + (_,objid) = parser.nexttoken() # ignored + (_,genno) = parser.nexttoken() # ignored + (_,kwd) = parser.nexttoken() + (_,stream) = parser.nextobject() + if not isinstance(stream, PDFStream) or \ + stream.dic['Type'] is not LITERAL_XREF: + raise PDFNoValidXRef('Invalid PDF stream spec.') + size = stream.dic['Size'] + index = stream.dic.get('Index', (0,size)) + self.index = list(zip(itertools.islice(index, 0, None, 2), + itertools.islice(index, 1, None, 2))) + (self.fl1, self.fl2, self.fl3) = stream.dic['W'] + self.data = stream.get_data() + self.entlen = self.fl1+self.fl2+self.fl3 + self.trailer = stream.dic + return + + def getpos(self, objid): + offset = 0 + for first, size in self.index: + if first <= objid and objid < (first + size): + break + offset += size + else: + raise KeyError(objid) + i = self.entlen * ((objid - first) + offset) + ent = self.data[i:i+self.entlen] + f1 = nunpack(ent[:self.fl1], 1) + if f1 == 1: + pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) + genno = nunpack(ent[self.fl1+self.fl2:]) + return (None, pos) + elif f1 == 2: + objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) + index = nunpack(ent[self.fl1+self.fl2:]) + return (objid, index) + # this is a free object + raise KeyError(objid) + + +## PDFDocument +## +## A PDFDocument object represents a PDF document. +## Since a PDF file is usually pretty big, normally it is not loaded +## at once. Rather it is parsed dynamically as processing goes. +## A PDF parser is associated with the document. +## +class PDFDocument(object): + + def __init__(self): + self.xrefs = [] + self.objs = {} + self.parsed_objs = {} + self.root = None + self.catalog = None + self.parser = None + self.encryption = None + self.decipher = None + # dictionaries for fileopen + self.fileopen = {} + self.urlresult = {} + self.ready = False + return + + # set_parser(parser) + # Associates the document with an (already initialized) parser object. + def set_parser(self, parser): + if self.parser: + return + self.parser = parser + # The document is set to be temporarily ready during collecting + # all the basic information about the document, e.g. + # the header, the encryption information, and the access rights + # for the document. + self.ready = True + # Retrieve the information of each header that was appended + # (maybe multiple times) at the end of the document. + self.xrefs = parser.read_xref() + for xref in self.xrefs: + trailer = xref.trailer + if not trailer: continue + + # If there's an encryption info, remember it. + if 'Encrypt' in trailer: + #assert not self.encryption + try: + self.encryption = (list_value(trailer['ID']), + dict_value(trailer['Encrypt'])) + # fix for bad files + except: + self.encryption = (b'ffffffffffffffffffffffffffffffffffff', + dict_value(trailer['Encrypt'])) + if 'Root' in trailer: + self.set_root(dict_value(trailer['Root'])) + break + else: + raise PDFSyntaxError('No /Root object! - Is this really a PDF?') + # The document is set to be non-ready again, until all the + # proper initialization (asking the password key and + # verifying the access permission, so on) is finished. + self.ready = False + return + + # set_root(root) + # Set the Root dictionary of the document. + # Each PDF file must have exactly one /Root dictionary. + def set_root(self, root): + self.root = root + self.catalog = dict_value(self.root) + if self.catalog.get('Type') is not LITERAL_CATALOG: + if STRICT: + raise PDFSyntaxError('Catalog not found!') + return + # initialize(password='') + # Perform the initialization with a given password. + # This step is mandatory even if there's no password associated + # with the document. + def initialize(self, password=''): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return + (docid, param) = self.encryption + type = literal_name(param['Filter']) + if type == 'Adobe.APS' or type == "Standard" or type == "EBX_HANDLER": + print("This script is just for FOPN encryption.") + print("For standard password PDFs or Adobe PDFs, use ineptpdy.py") + raise PDFEncryptionError("Not a FileOpen-encrypted file") + if type == 'FOPN_fLock': + # remove of unnecessairy password attribute + return self.initialize_fopn_flock(docid, param) + if type == 'FOPN_foweb': + # remove of unnecessairy password attribute + return self.initialize_fopn(docid, param) + raise PDFEncryptionError('Unknown filter: param=%r' % param) + + def initialize_and_return_filter(self): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return None + + (docid, param) = self.encryption + type = literal_name(param['Filter']) + return type + + + PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ + b'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' + + + + # fileopen support + def initialize_fopn_flock(self, docid, param): + raise ADEPTError('FOPN_fLock not supported, yet ...') + # debug mode processing + global DEBUG_MODE + global IVERSION + if DEBUG_MODE == True: + if os.access('.',os.W_OK) == True: + debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') + else: + raise ADEPTError('Cannot write debug file, current directory is not writable') + self.is_printable = self.is_modifiable = self.is_extractable = True + # get parameters and add it to the fo dictionary + self.fileopen['V'] = int_value(param.get('V',2)) + # crypt base + (docid, param) = self.encryption + #rights = dict_value(param['Info']) + rights = param['Info'] + #print rights + if DEBUG_MODE == True: debugfile.write(rights + '\n\n') +## for pair in rights.split(';'): +## try: +## key, value = pair.split('=',1) +## self.fileopen[key] = value +## # fix for some misconfigured INFO variables +## except: +## pass +## kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ +## 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} +## for keys in kattr: +## try: +## self.fileopen[kattr[keys]] = self.fileopen[keys] +## del self.fileopen[keys] +## except: +## continue + # differentiate OS types +## sysplatform = sys.platform +## # if ostype is Windows +## if sysplatform=='win32': +## self.osuseragent = 'Windows NT 6.0' +## self.get_macaddress = self.get_win_macaddress +## self.fo_sethwids = self.fo_win_sethwids +## self.BrowserCookie = WinBrowserCookie +## elif sysplatform=='linux2': +## adeptout = 'Linux is not supported, yet.\n' +## raise ADEPTError(adeptout) +## self.osuseragent = 'Linux i686' +## self.get_macaddress = self.get_linux_macaddress +## self.fo_sethwids = self.fo_linux_sethwids +## else: +## adeptout = '' +## adeptout = adeptout + 'Due to various privacy violations from Apple\n' +## adeptout = adeptout + 'Mac OS X support is disabled by default.' +## raise ADEPTError(adeptout) +## # add static arguments for http/https request +## self.fo_setattributes() +## # add hardware specific arguments for http/https request +## self.fo_sethwids() +## +## if 'Code' in self.urlresult: +## if self.fileopen['Length'] == len(self.urlresult['Code']): +## self.decrypt_key = self.urlresult['Code'] +## else: +## self.decrypt_key = self.urlresult['Code'].decode('hex') +## else: +## raise ADEPTError('Cannot find decryption key.') + self.decrypt_key = 'stuff' + self.genkey = self.genkey_v2 + self.decipher = self.decrypt_rc4 + self.ready = True + return + + def initialize_fopn(self, docid, param): + # debug mode processing + global DEBUG_MODE + global IVERSION + if DEBUG_MODE == True: + if os.access('.',os.W_OK) == True: + debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') + else: + raise ADEPTError('Cannot write debug file, current directory is not writable') + self.is_printable = self.is_modifiable = self.is_extractable = True + # get parameters and add it to the fo dictionary + self.fileopen['Length'] = int_value(param.get('Length', 0)) / 8 + self.fileopen['VEID'] = str_value(param.get('VEID')) + self.fileopen['BUILD'] = str_value(param.get('BUILD')) + self.fileopen['SVID'] = str_value(param.get('SVID')) + self.fileopen['DUID'] = str_value(param.get('DUID')) + self.fileopen['V'] = int_value(param.get('V',2)) + # crypt base + rights = str_value(param.get('INFO')).decode('base64') + rights = self.genkey_fileopeninfo(rights) + if DEBUG_MODE == True: debugfile.write(rights + '\n\n') + for pair in rights.split(';'): + try: + key, value = pair.split('=',1) + self.fileopen[key] = value + # fix for some misconfigured INFO variables + except: + pass + kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ + 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} + for keys in kattr: + # fishing some misconfigured slashs out of it + try: + self.fileopen[kattr[keys]] = urllib.quote(self.fileopen[keys],safe='') + del self.fileopen[keys] + except: + continue + # differentiate OS types + sysplatform = sys.platform + # if ostype is Windows + if sysplatform=='win32': + self.osuseragent = 'Windows NT 6.0' + self.get_macaddress = self.get_win_macaddress + self.fo_sethwids = self.fo_win_sethwids + self.BrowserCookie = WinBrowserCookie + elif sysplatform=='linux2': + adeptout = 'Linux is not supported, yet.\n' + raise ADEPTError(adeptout) + self.osuseragent = 'Linux i686' + self.get_macaddress = self.get_linux_macaddress + self.fo_sethwids = self.fo_linux_sethwids + else: + adeptout = '' + adeptout = adeptout + 'Mac OS X is not supported, yet.' + adeptout = adeptout + 'Read the blogs FAQs for more information' + raise ADEPTError(adeptout) + # add static arguments for http/https request + self.fo_setattributes() + # add hardware specific arguments for http/https request + self.fo_sethwids() + #if DEBUG_MODE == True: debugfile.write(self.fileopen) + if 'UURL' in self.fileopen: + buildurl = self.fileopen['UURL'] + else: + buildurl = self.fileopen['PURL'] + # fix for bad DPRM structure + if self.fileopen['DPRM'][0] != r'/': + self.fileopen['DPRM'] = r'/' + self.fileopen['DPRM'] + # genius fix for bad server urls (IMHO) + if '?' in self.fileopen['DPRM']: + buildurl = buildurl + self.fileopen['DPRM'] + '&' + else: + buildurl = buildurl + self.fileopen['DPRM'] + '?' + + # debug customization + #self.fileopen['Machine'] = '' + #self.fileopen['Disk'] = '' + + + surl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ + 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'OSName', 'OSData', 'Language',\ + 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ + 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk',\ + 'FormHFT',\ + 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ + 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ + 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ + 'FSName', 'FowpKbd', 'OSBuild',\ + 'RequestSchema') + + #settings request and special modes + if 'EVER' in self.fileopen and float(self.fileopen['EVER']) < 3.8: + self.fileopen['Mode'] = 'ICx' + + origurl = buildurl + buildurl = buildurl + 'Request=Setting' + for keys in surl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write( 'settings url:\n') + if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') + # custom user agent identification? + if 'AGEN' in self.fileopen: + useragent = self.fileopen['AGEN'] + urllib.URLopener.version = useragent + # attribute doesn't exist - take the default user agent + else: + urllib.URLopener.version = self.osuseragent + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') +## finally: +## u.close() + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write('Settings'+'\n') + if DEBUG_MODE == True: debugfile.write(result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.surlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + # fix for bad server response + if key not in self.surlresult: + self.surlresult[key] = value + except: + pass + if 'RequestSchema' in self.surlresult: + self.fileopen['RequestSchema'] = self.surlresult['RequestSchema'] + if 'ServerSessionData' in self.surlresult: + self.fileopen['ServerSessionData'] = self.surlresult['ServerSessionData'] + if 'SetScope' in self.surlresult: + self.fileopen['RequestSchema'] = self.surlresult['SetScope'] + #print self.surlresult + if 'RetVal' in self.surlresult and 'SEMO' not in self.fileopen and(('Reason' in self.surlresult and \ + self.surlresult['Reason'] == 'AskUnp') or ('SetTarget' in self.surlresult and\ + self.surlresult['SetTarget'] == 'UnpDlg')): + # get user and password dialog + try: + self.gen_pw_dialog(self.surlresult['UnpUiName'], self.surlresult['UnpUiPass'],\ + self.surlresult['UnpUiTitle'], self.surlresult['UnpUiOk'],\ + self.surlresult['UnpUiSunk'], self.surlresult['UnpUiComm']) + except: + self.gen_pw_dialog() + + # the fileopen check might not be always right because of strange server responses + if 'SEMO' in self.fileopen and (self.fileopen['SEMO'] == '1'\ + or self.fileopen['SEMO'] == '2') and ('CSES' in self.fileopen and\ + self.fileopen['CSES'] != 'fileopen'): + # get the url name for the cookie(s) + if 'CURL' in self.fileopen: + self.surl = self.fileopen['CURL'] + if 'CSES' in self.fileopen: + self.cses = self.fileopen['CSES'] + elif 'PHOS' in self.fileopen: + self.surl = self.fileopen['PHOS'] + elif 'LHOS' in self.fileopen: + self.surl = self.fileopen['LHOS'] + else: + raise ADEPTError('unknown Cookie name.\n Check ineptpdf forum for further assistance') + self.pwfieldreq = 1 + # session cookie processing + if self.fileopen['SEMO'] == '1': + cookies = self.BrowserCookie() + #print self.cses + #print self.surl + csession = cookies.getcookie(self.cses,self.surl) + if csession != None: + self.fileopen['Session'] = csession + self.gui = False + # fallback + else: + self.pwtk = Tkinter.Tk() + self.pwtk.title('Ineptpdf8') + self.pwtk.minsize(150, 0) + infotxt1 = 'Get the session cookie key manually (Firefox step-by-step:\n'+\ + 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ + '-> Search for a cookie from ' + self.surl +' with the\n'+\ + 'name ' + self.cses +' and copy paste the content field in the\n'+\ + 'Session Content field. Remove possible spaces or new lines at the '+\ + 'end\n (cursor must be blinking right behind the last character)' + self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) + self.label0.pack() + self.label1 = Tkinter.Label(self.pwtk, text="Session Content") + self.pwfieldreq = 0 + self.gui = True + # user cookie processing + elif self.fileopen['SEMO'] == '2': + cookies = self.BrowserCookie() + #print self.cses + #print self.surl + name = cookies.getcookie('name',self.surl) + passw = cookies.getcookie('pass',self.surl) + if name != None or passw != None: + self.fileopen['UserName'] = urllib.quote(name) + self.fileopen['UserPass'] = urllib.quote(passw) + self.gui = False + # fallback + else: + self.pwtk = Tkinter.Tk() + self.pwtk.title('Ineptpdf8') + self.pwtk.minsize(150, 0) + self.label1 = Tkinter.Label(self.pwtk, text="Username") + infotxt1 = 'Get the user cookie keys manually (Firefox step-by-step:\n'+\ + 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ + '-> Search for cookies from ' + self.surl +' with the\n'+\ + 'name name in the user field and copy paste the content field in the\n'+\ + 'username field. Do the same with the name pass in the password field).' + self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) + self.label0.pack() + self.pwfieldreq = 1 + self.gui = True +## else: +## self.pwtk = Tkinter.Tk() +## self.pwtk.title('Ineptpdf8') +## self.pwtk.minsize(150, 0) +## self.pwfieldreq = 0 +## self.label1 = Tkinter.Label(self.pwtk, text="Username") +## self.pwfieldreq = 1 +## self.gui = True + if self.gui == True: + self.un_entry = Tkinter.Entry(self.pwtk) + # cursor here + self.un_entry.focus() + self.label2 = Tkinter.Label(self.pwtk, text="Password") + self.pw_entry = Tkinter.Entry(self.pwtk, show="*") + self.button = Tkinter.Button(self.pwtk, text='Go for it!', command=self.fo_save_values) + # widget layout, stack vertical + self.label1.pack() + self.un_entry.pack() + # create a password label and field + if self.pwfieldreq == 1: + self.label2.pack() + self.pw_entry.pack() + self.button.pack() + self.pwtk.update() + # start the event loop + self.pwtk.mainloop() + + # original request + # drive through tupple for building the permission url + burl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ + 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'Language',\ + 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ + 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk', 'User', 'SaUser', 'SaSID',\ + # special security measures + 'HostIsDomain', 'PhysHostname', 'LogiHostname', 'SaRefDomain',\ + 'FormHFT', 'UserName', 'UserPass', 'Session', \ + 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ + 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ + 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ + 'FSName', 'ServerSessionData', 'FowpKbd', 'OSBuild', \ + 'DocumentSessionData', 'RequestSchema') + + buildurl = origurl + buildurl = buildurl + 'Request=DocPerm' + for keys in burl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write('1st url:'+'\n') + if DEBUG_MODE == True: debugfile.write(buildurl+'\n\n') + # custom user agent identification? + if 'AGEN' in self.fileopen: + useragent = self.fileopen['AGEN'] + urllib.URLopener.version = useragent + # attribute doesn't exist - take the default user agent + else: + urllib.URLopener.version = self.osuseragent + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') +## finally: +## u.close() + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write('1st preresult'+'\n') + if DEBUG_MODE == True: debugfile.write(result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.urlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + self.urlresult[key] = value + except: + pass +## if 'RequestSchema' in self.surlresult: +## self.fileopen['RequestSchema'] = self.urlresult['RequestSchema'] + #self.urlresult + #result[0:8] == 'RetVal=1') or (result[0:8] == 'RetVal=2'): + if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ + self.urlresult['RetVal'] != '2' and \ + self.urlresult['RetVal'] != 'Update' and \ + self.urlresult['RetVal'] != 'Answer')): + + if ('Reason' in self.urlresult and (self.urlresult['Reason'] == 'BadUserPwd'\ + or self.urlresult['Reason'] == 'AskUnp')) or ('SwitchTo' in self.urlresult\ + and (self.urlresult['SwitchTo'] == 'Dialog')): + if 'ServerSessionData' in self.urlresult: + self.fileopen['ServerSessionData'] = self.urlresult['ServerSessionData'] + if 'DocumentSessionData' in self.urlresult: + self.fileopen['DocumentSessionData'] = self.urlresult['DocumentSessionData'] + buildurl = origurl + buildurl = buildurl + 'Request=DocPerm' + self.gen_pw_dialog() + # password not found - fallback + for keys in burl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write( '2ndurl:') + if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write( '2nd preresult') + if DEBUG_MODE == True: debugfile.write( result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.urlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + self.urlresult[key] = value + except: + pass + # did it work? + if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ + self.urlresult['RetVal'] != '2' and + self.urlresult['RetVal'] != 'Update' and \ + self.urlresult['RetVal'] != 'Answer')): + raise ADEPTError('Decryption was not successfull.\nReason: ' + self.urlresult['Error']) + # fix for non-standard-conform fileopen pdfs +## if self.fileopen['Length'] != 5 and self.fileopen['Length'] != 16: +## if self.fileopen['V'] == 1: +## self.fileopen['Length'] = 5 +## else: +## self.fileopen['Length'] = 16 + # patch for malformed pdfs + #print len(self.urlresult['Code']) + #print self.urlresult['Code'].encode('hex') + if 'code' in self.urlresult: + self.urlresult['Code'] = self.urlresult['code'] + if 'Code' in self.urlresult: + if len(self.urlresult['Code']) == 5 or len(self.urlresult['Code']) == 16: + self.decrypt_key = self.urlresult['Code'] + else: + self.decrypt_key = self.urlresult['Code'].decode('hex') + else: + raise ADEPTError('Cannot find decryption key.') + + + + V = int_value(param.get('V',2)) + R = int_value(param.get('R')) + + + # genkey method + if V == 1 or V == 2 or V == 4: + self.genkey = self.genkey_v2 + elif V == 3: + self.genkey = self.genkey_v3 + elif V >= 5: + self.genkey = self.genkey_v5 + + set_decipher = False + + if V >= 4: + # Check if we need new genkey_v4 - only if we're using AES. + try: + for key in param['CF']: + algo = str(param["CF"][key]["CFM"]) + if algo == "/AESV2": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/AESV3": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/V2": + set_decipher = True + self.decipher = self.decrypt_rc4 + except: + pass + + # rc4 + if V < 4: + self.decipher = self.decrypt_rc4 # XXX may be AES + # aes + if not set_decipher: + # This should usually already be set by now. + # If it's not, assume that V4 and newer are using AES + if V >= 4: + self.decipher = self.decrypt_aes + self.ready = True + return + + def gen_pw_dialog(self, Username='Username', Password='Password', Title='User/Password Authentication',\ + OK='Proceed', Text1='Authorization', Text2='Enter Required Data'): + self.pwtk = Tkinter.Tk() + self.pwtk.title(Title) + self.pwtk.minsize(150, 0) + self.label1 = Tkinter.Label(self.pwtk, text=Text1) + self.label2 = Tkinter.Label(self.pwtk, text=Text2) + self.label3 = Tkinter.Label(self.pwtk, text=Username) + self.pwfieldreq = 1 + self.gui = True + self.un_entry = Tkinter.Entry(self.pwtk) + # cursor here + self.un_entry.focus() + self.label4 = Tkinter.Label(self.pwtk, text=Password) + self.pw_entry = Tkinter.Entry(self.pwtk, show="*") + self.button = Tkinter.Button(self.pwtk, text=OK, command=self.fo_save_values) + # widget layout, stack vertical + self.label1.pack() + self.label2.pack() + self.label3.pack() + self.un_entry.pack() + # create a password label and field + if self.pwfieldreq == 1: + self.label4.pack() + self.pw_entry.pack() + self.button.pack() + self.pwtk.update() + # start the event loop + self.pwtk.mainloop() + + # genkey functions + def genkey_v2(self, objid, genno): + objid = struct.pack(' -1: + mac = line.split()[4] + break + return mac.replace(':','') + except: + raise ADEPTError('Cannot find MAC address. Get forum help.') + + def get_win_macaddress(self): + try: + gasize = c_ulong(5000) + p = create_string_buffer(5000) + GetAdaptersInfo = windll.iphlpapi.GetAdaptersInfo + GetAdaptersInfo(byref(p),byref(gasize)) + return p[0x194:0x19a].encode('hex') + except: + raise ADEPTError('Cannot find MAC address. Get forum help.') + + # custom conversion 5 bytes to 8 chars method + def fo_convert5to8(self, edisk): + # byte to number/char mapping table + darray=[0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,\ + 0x46,0x47,0x48,0x4A,0x4B,0x4C,0x4D,0x4E,0x50,0x51,0x52,0x53,0x54,\ + 0x55,0x56,0x57,0x58,0x59,0x5A] + pdid = struct.pack('> 5 + outputhw = outputhw + chr(darray[index]) + pdid = (ord(edisk[4]) << 2)|pdid + # get the last 2 bits from the hwid + low part of the cpuid + for i in range(0,2): + index = pdid & 0x1f + # shift the disk id 5 bits to the right + pdid = pdid >> 5 + outputhw = outputhw + chr(darray[index]) + return outputhw + + # Linux processing + def fo_linux_sethwids(self): + # linux specific attributes + self.fileopen['OSType']='Linux' + self.fileopen['AcroProduct']='AcroReader' + self.fileopen['AcroReader']='Yes' + self.fileopen['AcroVersion']='9.101' + self.fileopen['FSName']='ext3' + self.fileopen['Build']='878' + self.fileopen['ProdVer']='1.8.5.1' + self.fileopen['OSBuild']='2.6.33' + # write hardware keys + hwkey = 0 + pmac = self.get_macaddress().decode("hex"); + self.fileopen['Disk'] = self.fo_convert5to8(pmac[1:]) + # get primary used default mac address + self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:]) + # get uuid + # check for reversed offline handler 6AB83F4Ah + AFh 6AB83F4Ah + if 'LILA' in self.fileopen: + pass + if 'Ident4ID' in self.fileopen: + self.fileopen['User'] = getpass.getuser() + self.fileopen['SaUser'] = getpass.getuser() + try: + cuser = winreg.HKEY_CURRENT_USER + FOW3_UUID = 'Software\\Fileopen' + regkey = winreg.OpenKey(cuser, FOW3_UUID) + userkey = winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0] +# if self.genkey_cryptmach(userkey)[0:4] != 'ec20': + self.fileopen['Uuid'] = self.genkey_cryptmach(userkey)[4:] +## elif self.genkey_cryptmach(userkey)[0:4] != 'ec20': +## self.fileopen['Uuid'] = self.genkey_cryptmach(userkey,1)[4:] +## else: + except: + raise ADEPTError('Cannot find FowP3Uuid file - reason might be Adobe (Reader) X.'\ + 'Read the FAQs for more information how to solve the problem.') + else: + self.fileopen['Uuid'] = str(uuid.uuid1()) + # get time stamp + self.fileopen['Stamp'] = str(time.time())[:-3] + # get fileopen input pdf name + path + self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\ + + urllib.quote(os.path.normpath(INPUTFILEPATH)) + # clear the link + #INPUTFILEPATH = '' +## # get volume name (urllib quote necessairy?) urllib.quote( +## self.fileopen['VolName'] = win32api.GetVolumeInformation("C:\\")[0] +## # get volume serial number +## self.fileopen['VolSN'] = str(win32api.GetVolumeInformation("C:\\")[1]) + return + + # Windows processing + def fo_win_sethwids(self): + # Windows specific attributes + self.fileopen['OSType']='Windows' + self.fileopen['OSName']='Vista' + self.fileopen['OSData']='Service%20Pack%204' + self.fileopen['AcroProduct']='Reader' + self.fileopen['AcroReader']='Yes' + self.fileopen['OSBuild']='7600' + self.fileopen['AcroVersion']='9.1024' + self.fileopen['Build']='879' + # write hardware keys + hwkey = 0 + # get the os type and save it in ostype + try: + import win32api + import win32security + import win32file + except: + raise ADEPTError('PyWin Extension (Win32API module) needed.\n'+\ + 'Download from http://sourceforge.net/projects/pywin32/files/ ') + try: + import winreg + except ImportError: + import _winreg as winreg + try: + v0 = win32api.GetVolumeInformation('C:\\') + v1 = win32api.GetSystemInfo()[6] + # fix for possible negative integer (Python problem) + volserial = v0[1] & 0xffffffff + lowcpu = v1 & 255 + highcpu = (v1 >> 8) & 255 + # changed to int + volserial = struct.pack(' 0 and mode == True: + m.update(key_string[:(13-len(uname))]) + md5sum = m.digest()[0:16] + # print md5sum.encode('hex') + # normal ident4id calculation + retval = [] + for sdata in data: + retval.append(ARC4.new(md5sum).decrypt(sdata)) + for rval in retval: + if rval[:4] == 'ec20': + return rval[4:] + return False + # start normal execution + # list for username variants + unamevars = [] + # fill username variants list + unamevars.append(self.user) + unamevars.append(self.user + chr(0)) + unamevars.append(self.user.lower()) + unamevars.append(self.user.lower() + chr(0)) + unamevars.append(self.user.upper()) + unamevars.append(self.user.upper() + chr(0)) + # go through it + for uname in unamevars: + result = genkeysub(uname, True) + if result != False: + return result + result = genkeysub(uname) + if result != False: + return result + # didn't find it, return false + return False +## raise ADEPTError('Unsupported Ident4D Decryption,\n'+\ +## 'report the bug to the ineptpdf script forum') + + KEYWORD_OBJ = KWD(b'obj') + + def getobj(self, objid): + if not self.ready: + raise PDFException('PDFDocument not initialized') + #assert self.xrefs + if objid in self.objs: + genno = 0 + obj = self.objs[objid] + else: + for xref in self.xrefs: + try: + (stmid, index) = xref.getpos(objid) + break + except KeyError: + pass + else: + #if STRICT: + # raise PDFSyntaxError('Cannot locate objid=%r' % objid) + return None + if stmid: + if gen_xref_stm: + return PDFObjStmRef(objid, stmid, index) + # Stuff from pdfminer: extract objects from object stream + stream = stream_value(self.getobj(stmid)) + if stream.dic.get('Type') is not LITERAL_OBJSTM: + if STRICT: + raise PDFSyntaxError('Not a stream object: %r' % stream) + try: + n = stream.dic['N'] + except KeyError: + if STRICT: + raise PDFSyntaxError('N is not defined: %r' % stream) + n = 0 + + if stmid in self.parsed_objs: + objs = self.parsed_objs[stmid] + else: + parser = PDFObjStrmParser(stream.get_data(), self) + objs = [] + try: + while 1: + (_,obj) = parser.nextobject() + objs.append(obj) + except PSEOF: + pass + self.parsed_objs[stmid] = objs + genno = 0 + i = n*2+index + try: + obj = objs[i] + except IndexError: + raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) + if isinstance(obj, PDFStream): + obj.set_objid(objid, 0) +### + else: + self.parser.seek(index) + (_,objid1) = self.parser.nexttoken() # objid + (_,genno) = self.parser.nexttoken() # genno + #assert objid1 == objid, (objid, objid1) + (_,kwd) = self.parser.nexttoken() + # #### hack around malformed pdf files + # assert objid1 == objid, (objid, objid1) +## if objid1 != objid: +## x = [] +## while kwd is not self.KEYWORD_OBJ: +## (_,kwd) = self.parser.nexttoken() +## x.append(kwd) +## if x: +## objid1 = x[-2] +## genno = x[-1] +## + if kwd is not self.KEYWORD_OBJ: + raise PDFSyntaxError( + 'Invalid object spec: offset=%r' % index) + (_,obj) = self.parser.nextobject() + if isinstance(obj, PDFStream): + obj.set_objid(objid, genno) + if self.decipher: + obj = decipher_all(self.decipher, objid, genno, obj) + self.objs[objid] = obj + return obj + +# helper class for cookie retrival +class WinBrowserCookie(): + def __init__(self): + pass + def getcookie(self, cname, chost): + # check firefox db + fprofile = os.environ['AppData']+r'\Mozilla\Firefox' + pinifile = 'profiles.ini' + fini = os.path.normpath(fprofile + '\\' + pinifile) + try: + with open(fini,'r') as ffini: + firefoxini = ffini.read() + # Firefox not installed or on an USB stick + except: + return None + for pair in firefoxini.split('\n'): + try: + key, value = pair.split('=',1) + if key == 'Path': + fprofile = os.path.normpath(fprofile+'//'+value+'//'+'cookies.sqlite') + break + # asdf + except: + continue + if os.path.isfile(fprofile): + try: + con = sqlite3.connect(fprofile,1) + except: + raise ADEPTError('Firefox Cookie data base locked. Close Firefox and try again') + cur = con.cursor() + try: + cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) + except Exception: + raise ADEPTError('Firefox Cookie database is locked. Close Firefox and try again') + try: + return cur.fetchone()[0] + except Exception: + # sometimes is a dot in front of the host + chost = '.'+chost + cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) + try: + return cur.fetchone()[0] + except: + return None + +class PDFObjStmRef(object): + maxindex = 0 + def __init__(self, objid, stmid, index): + self.objid = objid + self.stmid = stmid + self.index = index + if index > PDFObjStmRef.maxindex: + PDFObjStmRef.maxindex = index + + +## PDFParser +## +class PDFParser(PSStackParser): + + def __init__(self, doc, fp): + PSStackParser.__init__(self, fp) + self.doc = doc + self.doc.set_parser(self) + return + + def __repr__(self): + return '' + + KEYWORD_R = KWD(b'R') + KEYWORD_ENDOBJ = KWD(b'endobj') + KEYWORD_STREAM = KWD(b'stream') + KEYWORD_XREF = KWD(b'xref') + KEYWORD_STARTXREF = KWD(b'startxref') + def do_keyword(self, pos, token): + if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): + self.add_results(*self.pop(1)) + return + if token is self.KEYWORD_ENDOBJ: + self.add_results(*self.pop(4)) + return + + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + + if token is self.KEYWORD_STREAM: + # stream object + ((_,dic),) = self.pop(1) + dic = dict_value(dic) + try: + objlen = int_value(dic['Length']) + except KeyError: + if STRICT: + raise PDFSyntaxError('/Length is undefined: %r' % dic) + objlen = 0 + self.seek(pos) + try: + (_, line) = self.nextline() # 'stream' + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + return + pos += len(line) + self.fp.seek(pos) + data = self.fp.read(objlen) + self.seek(pos+objlen) + while 1: + try: + (linepos, line) = self.nextline() + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + break + if b'endstream' in line: + i = line.index(b'endstream') + objlen += i + data += line[:i] + break + objlen += len(line) + data += line + self.seek(pos+objlen) + obj = PDFStream(dic, data, self.doc.decipher) + self.push((pos, obj)) + return + + # others + self.push((pos, token)) + return + + def find_xref(self): + # search the last xref table by scanning the file backwards. + prev = None + for line in self.revreadlines(): + line = line.strip() + if line == b'startxref': break + if line: + prev = line + else: + raise PDFNoValidXRef('Unexpected EOF') + return int(prev) + + # read xref table + def read_xref_from(self, start, xrefs): + self.seek(start) + self.reset() + try: + (pos, token) = self.nexttoken() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF') + if isinstance(token, int): + # XRefStream: PDF-1.5 + if GEN_XREF_STM == 1: + global gen_xref_stm + gen_xref_stm = True + self.seek(pos) + self.reset() + xref = PDFXRefStream() + xref.load(self) + else: + if token is not self.KEYWORD_XREF: + raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % + (pos, token)) + self.nextline() + xref = PDFXRef() + xref.load(self) + xrefs.append(xref) + trailer = xref.trailer + if 'XRefStm' in trailer: + pos = int_value(trailer['XRefStm']) + self.read_xref_from(pos, xrefs) + if 'Prev' in trailer: + # find previous xref + pos = int_value(trailer['Prev']) + self.read_xref_from(pos, xrefs) + return + + # read xref tables and trailers + def read_xref(self): + xrefs = [] + trailerpos = None + try: + pos = self.find_xref() + self.read_xref_from(pos, xrefs) + except PDFNoValidXRef: + # fallback + self.seek(0) + pat = re.compile(rb'^(\d+)\s+(\d+)\s+obj\b') + offsets = {} + xref = PDFXRef() + while 1: + try: + (pos, line) = self.nextline() + except PSEOF: + break + if line.startswith(b'trailer'): + trailerpos = pos # remember last trailer + m = pat.match(line) + if not m: continue + (objid, genno) = m.groups() + offsets[int(objid)] = (0, pos) + if not offsets: raise + xref.offsets = offsets + if trailerpos: + self.seek(trailerpos) + xref.load_trailer(self) + xrefs.append(xref) + return xrefs + +## PDFObjStrmParser +## +class PDFObjStrmParser(PDFParser): + + def __init__(self, data, doc): + PSStackParser.__init__(self, BytesIO(data)) + self.doc = doc + return + + def flush(self): + self.add_results(*self.popall()) + return + + KEYWORD_R = KWD(b'R') + def do_keyword(self, pos, token): + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + # others + self.push((pos, token)) + return + +### +### My own code, for which there is none else to blame + +class PDFSerializer(object): + def __init__(self, inf, keypath): + global GEN_XREF_STM, gen_xref_stm + gen_xref_stm = GEN_XREF_STM > 1 + self.version = inf.read(8) + inf.seek(0) + self.doc = doc = PDFDocument() + parser = PDFParser(doc, inf) + doc.initialize(keypath) + self.objids = objids = set() + for xref in reversed(doc.xrefs): + trailer = xref.trailer + for objid in xref.objids(): + objids.add(objid) + trailer = dict(trailer) + trailer.pop('Prev', None) + trailer.pop('XRefStm', None) + if 'Encrypt' in trailer: + objids.remove(trailer.pop('Encrypt').objid) + self.trailer = trailer + + def dump(self, outf): + self.outf = outf + self.write(self.version) + self.write(b'\n%\xe2\xe3\xcf\xd3\n') + doc = self.doc + objids = self.objids + xrefs = {} + maxobj = max(objids) + trailer = dict(self.trailer) + trailer['Size'] = maxobj + 1 + for objid in objids: + obj = doc.getobj(objid) + if isinstance(obj, PDFObjStmRef): + xrefs[objid] = obj + continue + if obj is not None: + try: + genno = obj.genno + except AttributeError: + genno = 0 + xrefs[objid] = (self.tell(), genno) + self.serialize_indirect(objid, obj) + startxref = self.tell() + + if not gen_xref_stm: + self.write(b'xref\n') + self.write(b'0 %d\n' % (maxobj + 1,)) + for objid in range(0, maxobj + 1): + if objid in xrefs: + # force the genno to be 0 + self.write(b"%010d 00000 n \n" % xrefs[objid][0]) + else: + self.write(b"%010d %05d f \n" % (0, 65535)) + + self.write(b'trailer\n') + self.serialize_object(trailer) + self.write(b'\nstartxref\n%d\n%%%%EOF' % startxref) + + else: # Generate crossref stream. + + # Calculate size of entries + maxoffset = max(startxref, maxobj) + maxindex = PDFObjStmRef.maxindex + fl2 = 2 + power = 65536 + while maxoffset >= power: + fl2 += 1 + power *= 256 + fl3 = 1 + power = 256 + while maxindex >= power: + fl3 += 1 + power *= 256 + + index = [] + first = None + prev = None + data = [] + # Put the xrefstream's reference in itself + startxref = self.tell() + maxobj += 1 + xrefs[maxobj] = (startxref, 0) + for objid in sorted(xrefs): + if first is None: + first = objid + elif objid != prev + 1: + index.extend((first, prev - first + 1)) + first = objid + prev = objid + objref = xrefs[objid] + if isinstance(objref, PDFObjStmRef): + f1 = 2 + f2 = objref.stmid + f3 = objref.index + else: + f1 = 1 + f2 = objref[0] + # we force all generation numbers to be 0 + # f3 = objref[1] + f3 = 0 + + data.append(struct.pack('>B', f1)) + data.append(struct.pack('>L', f2)[-fl2:]) + data.append(struct.pack('>L', f3)[-fl3:]) + index.extend((first, prev - first + 1)) + data = zlib.compress(''.join(data)) + dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, + 'W': [1, fl2, fl3], 'Length': len(data), + 'Filter': LITERALS_FLATE_DECODE[0], + 'Root': trailer['Root'],} + if 'Info' in trailer: + dic['Info'] = trailer['Info'] + xrefstm = PDFStream(dic, data) + self.serialize_indirect(maxobj, xrefstm) + self.write(b'startxref\n%d\n%%%%EOF' % startxref) + def write(self, data): + self.outf.write(data) + self.last = data[-1:] + + def tell(self): + return self.outf.tell() + + def escape_string(self, string): + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'\n', b'\\n') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return string + + def serialize_object(self, obj): + if isinstance(obj, dict): + # Correct malformed Mac OS resource forks for Stanza + if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ + and isinstance(obj['Type'], int): + obj['Subtype'] = obj['Type'] + del obj['Type'] + # end - hope this doesn't have bad effects + self.write(b'<<') + for key, val in obj.items(): + self.write(str(LIT(key.encode('utf-8'))).encode('utf-8')) + self.serialize_object(val) + self.write(b'>>') + elif isinstance(obj, list): + self.write(b'[') + for val in obj: + self.serialize_object(val) + self.write(b']') + elif isinstance(obj, bytearray): + self.write(b'(%s)' % self.escape_string(obj)) + elif isinstance(obj, bytes): + self.write(b'<%s>' % binascii.hexlify(obj).upper()) + elif isinstance(obj, str): + self.write(b'(%s)' % self.escape_string(obj.encode('utf-8'))) + elif isinstance(obj, bool): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).lower().encode('utf-8')) + elif isinstance(obj, int): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, Decimal): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, PDFObjRef): + if self.last.isalnum(): + self.write(b' ') + self.write(b'%d %d R' % (obj.objid, 0)) + elif isinstance(obj, PDFStream): + ### If we don't generate cross ref streams the object streams + ### are no longer useful, as we have extracted all objects from + ### them. Therefore leave them out from the output. + if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: + self.write(b'(deleted)') + else: + data = obj.get_decdata() + + # Fix length: + # We've decompressed and then recompressed the PDF stream. + # Depending on the algorithm, the implementation, and the compression level, + # the resulting recompressed stream is unlikely to have the same length as the original. + # So we need to update the PDF object to contain the new proper length. + + # Without this change, all PDFs exported by this plugin are slightly corrupted - + # even though most if not all PDF readers can correct that on-the-fly. + + if 'Length' in obj.dic: + obj.dic['Length'] = len(data) + + + self.serialize_object(obj.dic) + self.write(b'stream\n') + self.write(data) + self.write(b'\nendstream') + else: + data = str(obj).encode('utf-8') + if bytes([data[0]]).isalnum() and self.last.isalnum(): + self.write(b' ') + self.write(data) + + def serialize_indirect(self, objid, obj): + self.write(b'%d 0 obj' % (objid,)) + self.serialize_object(obj) + if self.last.isalnum(): + self.write(b'\n') + self.write(b'endobj\n') + +def cli_main(argv=sys.argv): + progname = os.path.basename(argv[0]) + if RSA is None: + print "%s: This script requires PyCrypto, which must be installed " \ + "separately. Read the top-of-script comment for details." % \ + (progname,) + return 1 + if len(argv) != 4: + print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,) + return 1 + keypath, inpath, outpath = argv[1:] + with open(inpath, 'rb') as inf: + serializer = PDFSerializer(inf, keypath) + # hope this will fix the 'bad file descriptor' problem + with open(outpath, 'wb') as outf: + # help construct to make sure the method runs to the end + serializer.dump(outf) + return 0 + + +class DecryptionDialog(Tkinter.Frame): + def __init__(self, root): + # debug mode debugging + global DEBUG_MODE + Tkinter.Frame.__init__(self, root, border=5) + ltext='Select file for decryption\n(Ignore Password / Key file option for Fileopen/APS PDFs)' + self.status = Tkinter.Label(self, text=ltext) + self.status.pack(fill=Tkconstants.X, expand=1) + body = Tkinter.Frame(self) + body.pack(fill=Tkconstants.X, expand=1) + sticky = Tkconstants.E + Tkconstants.W + body.grid_columnconfigure(1, weight=2) + Tkinter.Label(body, text='Password\nor Key file').grid(row=0) + self.keypath = Tkinter.Entry(body, width=30) + self.keypath.grid(row=0, column=1, sticky=sticky) + if os.path.exists('adeptkey.der'): + self.keypath.insert(0, 'adeptkey.der') + button = Tkinter.Button(body, text="...", command=self.get_keypath) + button.grid(row=0, column=2) + Tkinter.Label(body, text='Input file').grid(row=1) + self.inpath = Tkinter.Entry(body, width=30) + self.inpath.grid(row=1, column=1, sticky=sticky) + button = Tkinter.Button(body, text="...", command=self.get_inpath) + button.grid(row=1, column=2) + Tkinter.Label(body, text='Output file').grid(row=2) + self.outpath = Tkinter.Entry(body, width=30) + self.outpath.grid(row=2, column=1, sticky=sticky) + debugmode = Tkinter.Checkbutton(self, text = "Debug Mode (writable directory required)", command=self.debug_toggle, height=2, \ + width = 40) + debugmode.pack() + button = Tkinter.Button(body, text="...", command=self.get_outpath) + button.grid(row=2, column=2) + buttons = Tkinter.Frame(self) + buttons.pack() + + + botton = Tkinter.Button( + buttons, text="Decrypt", width=10, command=self.decrypt) + botton.pack(side=Tkconstants.LEFT) + Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) + button = Tkinter.Button( + buttons, text="Quit", width=10, command=self.quit) + button.pack(side=Tkconstants.RIGHT) + + + def get_keypath(self): + keypath = tkFileDialog.askopenfilename( + parent=None, title='Select ADEPT key file', + defaultextension='.der', filetypes=[('DER-encoded files', '.der'), + ('All Files', '.*')]) + if keypath: + keypath = os.path.normpath(os.path.realpath(keypath)) + self.keypath.delete(0, Tkconstants.END) + self.keypath.insert(0, keypath) + return + + def get_inpath(self): + inpath = tkFileDialog.askopenfilename( + parent=None, title='Select ADEPT or FileOpen-encrypted PDF file to decrypt', + defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), + ('All files', '.*')]) + if inpath: + inpath = os.path.normpath(os.path.realpath(inpath)) + self.inpath.delete(0, Tkconstants.END) + self.inpath.insert(0, inpath) + return + + def debug_toggle(self): + global DEBUG_MODE + if DEBUG_MODE == False: + DEBUG_MODE = True + else: + DEBUG_MODE = False + + def get_outpath(self): + outpath = tkFileDialog.asksaveasfilename( + parent=None, title='Select unencrypted PDF file to produce', + defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), + ('All files', '.*')]) + if outpath: + outpath = os.path.normpath(os.path.realpath(outpath)) + self.outpath.delete(0, Tkconstants.END) + self.outpath.insert(0, outpath) + return + + def decrypt(self): + global INPUTFILEPATH + global KEYFILEPATH + global PASSWORD + keypath = self.keypath.get() + inpath = self.inpath.get() + outpath = self.outpath.get() + if not keypath or not os.path.exists(keypath): + # keyfile doesn't exist + KEYFILEPATH = False + PASSWORD = keypath + if not inpath or not os.path.exists(inpath): + self.status['text'] = 'Specified input file does not exist' + return + if not outpath: + self.status['text'] = 'Output file not specified' + return + if inpath == outpath: + self.status['text'] = 'Must have different input and output files' + return + # patch for non-ascii characters + INPUTFILEPATH = inpath.encode('utf-8') + argv = [sys.argv[0], keypath, inpath, outpath] + self.status['text'] = 'Processing ...' + try: + cli_main(argv) + except Exception, a: + self.status['text'] = 'Error: ' + str(a) + return + self.status['text'] = 'File successfully decrypted.\n'+\ + 'Close this window or decrypt another pdf file.' + return + +def gui_main(): + root = Tkinter.Tk() + if RSA is None: + root.withdraw() + tkMessageBox.showerror( + "PDF FileOpen Decrypter", + "This script requires PyCrypto, which must be installed " + "separately. Read the top-of-script comment for details.") + return 1 + root.title('FileOpen PDF Decrypter 8.5.0') + root.resizable(True, False) + root.minsize(370, 0) + DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) + root.mainloop() + return 0 + + +if __name__ == '__main__': + if len(sys.argv) > 1: + sys.exit(cli_main()) + sys.exit(gui_main()) -- cgit v1.2.3