From 737d5e7f1e7e0763d9b5e42e3d0effb9c1dfc0ea Mon Sep 17 00:00:00 2001 From: NoDRM Date: Sun, 3 Dec 2023 10:45:09 +0100 Subject: Bunch of updates for the FileOpen script --- .../ineptpdf_fileopen.pyw | 3067 ++++++++++++++++++++ 1 file changed, 3067 insertions(+) create mode 100644 Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw (limited to 'Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw') diff --git a/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw b/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw new file mode 100644 index 0000000..5e647e1 --- /dev/null +++ b/Other_Tools/Tetrachroma_FileOpen_ineptpdf/ineptpdf_fileopen.pyw @@ -0,0 +1,3067 @@ +#! /usr/bin/python + +# ineptpdf + +# To run this program install Python 2.7 from http://www.python.org/download/ +# +# PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto +# +# and PyWin Extension (Win32API module) from +# http://sourceforge.net/projects/pywin32/files/ +# +# Make sure to install the dedicated versions for Python 2.7. +# +# It's recommended to use the 32-Bit Python Windows versions (even with a 64-bit +# Windows system). +# +# Save this script file as +# ineptpdf8.4.51.pyw and double-click on it to run it. + +# Revision history: +# 1 - Initial release +# 2 - Improved determination of key-generation algorithm +# 3 - Correctly handle PDF >=1.5 cross-reference streams +# 4 - Removal of ciando's personal ID (anon) +# 5 - removing small bug with V3 ebooks (anon) +# 6 - changed to adeptkey4.der format for 1.7.2 support (anon) +# 6.1 - backward compatibility for 1.7.1 and old adeptkey.der (anon) +# 7 - Get cross reference streams and object streams working for input. +# Not yet supported on output but this only effects file size, +# not functionality. (anon2) +# 7.1 - Correct a problem when an old trailer is not followed by startxref (anon2) +# 7.2 - Correct malformed Mac OS resource forks for Stanza +# - Support for cross ref streams on output (decreases file size) (anon2) +# 7.3 - Correct bug in trailer with cross ref stream that caused the error (anon2) +# "The root object is missing or invalid" in Adobe Reader. +# 7.4 - Force all generation numbers in output file to be 0, like in v6. +# Fallback code for wrong xref improved (search till last trailer +# instead of first) (anon2) +# 8 - fileopen user machine identifier support (Tetrachroma) +# 8.1 - fileopen user cookies support (Tetrachroma) +# 8.2 - fileopen user name/password support (Tetrachroma) +# 8.3 - fileopen session cookie support (Tetrachroma) +# 8.3.1 - fix for the "specified key file does not exist" error (Tetrachroma) +# 8.3.2 - improved server result parsing (Tetrachroma) +# 8.4 - Ident4D and encrypted Uuid support (Tetrachroma) +# 8.4.1 - improved MAC address processing (Tetrachroma) +# 8.4.2 - FowP3Uuid fallback file processing (Tetrachroma) +# 8.4.3 - improved user/password pdf file detection (Tetrachroma) +# 8.4.4 - small bugfix (Tetrachroma) +# 8.4.5 - improved cookie host searching (Tetrachroma) +# 8.4.6 - STRICT parsing disabled (non-standard pdf processing) (Tetrachroma) +# 8.4.7 - UTF-8 input file conversion (Tetrachroma) +# 8.4.8 - fix for more rare utf8 problems (Tetrachroma) +# 8.4.9 - solution for utf8 in comination with +# ident4id method (Tetrachroma) +# 8.4.10 - line feed processing, non c system drive patch, nrbook support (Tetrachroma) +# 8.4.11 - alternative ident4id calculation (Tetrachroma) +# 8.4.12 - fix for capital username characters and +# other unusual user login names (Tetrachroma & ZeroPoint) +# 8.4.13 - small bug fixes (Tetrachroma) +# 8.4.14 - fix for non-standard-conform fileopen pdfs (Tetrachroma) +# 8.4.15 - 'bad file descriptor'-fix (Tetrachroma) +# 8.4.16 - improves user/pass detection (Tetrachroma) +# 8.4.17 - fix for several '=' chars in a DPRM entity (Tetrachroma) +# 8.4.18 - follow up bug fix for the DPRM problem, +# more readable error messages (Tetrachroma) +# 8.4.19 - 2nd fix for 'bad file descriptor' problem (Tetrachroma) +# 8.4.20 - follow up patch (Tetrachroma) +# 8.4.21 - 3rd patch for 'bad file descriptor' (Tetrachroma) +# 8.4.22 - disable prints for exception prevention (Tetrachroma) +# 8.4.23 - check for additional security attributes (Tetrachroma) +# 8.4.24 - improved cookie session support (Tetrachroma) +# 8.4.25 - more compatibility with unicode files (Tetrachroma) +# 8.4.26 - automated session/user cookie request function (works +# only with Firefox 3.x+) (Tetrachroma) +# 8.4.27 - user/password fallback +# 8.4.28 - AES decryption, improved misconfigured pdf handling, +# limited experimental APS support (Tetrachroma & Neisklar) +# 8.4.29 - backport for bad formatted rc4 encrypted pdfs (Tetrachroma) +# 8.4.30 - extended authorization attributes support (Tetrachroma) +# 8.4.31 - improved session cookie and better server response error +# handling (Tetrachroma) +# 8.4.33 - small cookie optimizations (Tetrachroma) +# 8.4.33 - debug output option (Tetrachroma) +# 8.4.34 - better user/password management +# handles the 'AskUnp' response) (Tetrachroma) +# 8.4.35 - special handling for non-standard systems (Tetrachroma) +# 8.4.36 - previous machine/disk handling [PrevMach/PrevDisk] (Tetrachroma) +# 8.4.36 - FOPN_flock support (Tetrachroma) +# 8.4.37 - patch for unicode paths/filenames (Tetrachroma) +# 8.4.38 - small fix for user/password dialog (Tetrachroma) +# 8.4.39 - sophisticated request mode differentiation, forced +# uuid calculation (Tetrachroma) +# 8.4.40 - fix for non standard server responses (Tetrachroma) +# 8.4.41 - improved user/password request windows, +# better server response tolerance (Tetrachroma) +# 8.4.42 - improved nl/cr server response parsing (Tetrachroma) +# 8.4.43 - fix for user names longer than 13 characters and special +# uuid encryption (Tetrachroma) +# 8.4.44 - another fix for ident4d problem (Tetrachroma) +# 8.4.45 - 2nd fix for ident4d problem (Tetrachroma) +# 8.4.46 - script cleanup and optimizations (Tetrachroma) +# 8.4.47 - script identification change to Adobe Reader (Tetrachroma) +# 8.4.48 - improved tolerance for false file/registry entries (Tetrachroma) +# 8.4.49 - improved username encryption (Tetrachroma) +# 8.4.50 - improved (experimental) APS support (Tetrachroma & Neisklar) +# 8.4.51 - automatic APS offline key retrieval (works only for +# Onleihe right now) (80ka80 & Tetrachroma) + +# 8.5.0 - First update by noDRM - trying to update the script to include +# improvements from ineptpdf. + +""" +Decrypts Adobe ADEPT-encrypted and Fileopen PDF files. +""" + +from __future__ import with_statement + +__license__ = 'GPL v3' + +import sys +import os +import re +import zlib +import struct +import hashlib +from itertools import chain, islice +import xml.etree.ElementTree as etree +import Tkinter +import Tkconstants +import tkFileDialog +import tkMessageBox +# added for fileopen support +import urllib +import urlparse +import time +import socket +import string +import uuid +import subprocess +import time +import getpass +from ctypes import * +import traceback +import inspect +import tempfile +import sqlite3 +import httplib + +from decimal import Decimal +import itertools + +try: + from Crypto.Cipher import ARC4 + # needed for newer pdfs + from Crypto.Cipher import AES + from Crypto.Hash import SHA256 + from Crypto.PublicKey import RSA + +except ImportError: + ARC4 = None + RSA = None + +from io import BytesIO + +class ADEPTError(Exception): + pass + +# global variable (needed for fileopen and password decryption) +INPUTFILEPATH = '' +KEYFILEPATH = '' +PASSWORD = '' +DEBUG_MODE = False +IVERSION = '8.4.51' + +# Do we generate cross reference streams on output? +# 0 = never +# 1 = only if present in input +# 2 = always + +GEN_XREF_STM = 1 + +# This is the value for the current document +gen_xref_stm = False # will be set in PDFSerializer + +### +### ASN.1 parsing code from tlslite + +def bytesToNumber(bytes): + total = 0L + for byte in bytes: + total = (total << 8) + byte + return total + +class ASN1Error(Exception): + pass + +class ASN1Parser(object): + class Parser(object): + def __init__(self, bytes): + self.bytes = bytes + self.index = 0 + + def get(self, length): + if self.index + length > len(self.bytes): + raise ASN1Error("Error decoding ASN.1") + x = 0 + for count in range(length): + x <<= 8 + x |= self.bytes[self.index] + self.index += 1 + return x + + def getFixBytes(self, lengthBytes): + bytes = self.bytes[self.index : self.index+lengthBytes] + self.index += lengthBytes + return bytes + + def getVarBytes(self, lengthLength): + lengthBytes = self.get(lengthLength) + return self.getFixBytes(lengthBytes) + + def getFixList(self, length, lengthList): + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def getVarList(self, length, lengthLength): + lengthList = self.get(lengthLength) + if lengthList % length != 0: + raise ASN1Error("Error decoding ASN.1") + lengthList = int(lengthList/length) + l = [0] * lengthList + for x in range(lengthList): + l[x] = self.get(length) + return l + + def startLengthCheck(self, lengthLength): + self.lengthCheck = self.get(lengthLength) + self.indexCheck = self.index + + def setLengthCheck(self, length): + self.lengthCheck = length + self.indexCheck = self.index + + def stopLengthCheck(self): + if (self.index - self.indexCheck) != self.lengthCheck: + raise ASN1Error("Error decoding ASN.1") + + def atLengthCheck(self): + if (self.index - self.indexCheck) < self.lengthCheck: + return False + elif (self.index - self.indexCheck) == self.lengthCheck: + return True + else: + raise ASN1Error("Error decoding ASN.1") + + def __init__(self, bytes): + p = self.Parser(bytes) + p.get(1) + self.length = self._getASN1Length(p) + self.value = p.getFixBytes(self.length) + + def getChild(self, which): + p = self.Parser(self.value) + for x in range(which+1): + markIndex = p.index + p.get(1) + length = self._getASN1Length(p) + p.getFixBytes(length) + return ASN1Parser(p.bytes[markIndex:p.index]) + + def _getASN1Length(self, p): + firstLength = p.get(1) + if firstLength<=127: + return firstLength + else: + lengthLength = firstLength & 0x7F + return p.get(lengthLength) + +### +### PDF parsing routines from pdfminer, with changes for EBX_HANDLER + +## Utilities +## +def choplist(n, seq): + '''Groups every n elements of the list.''' + r = [] + for x in seq: + r.append(x) + if len(r) == n: + yield tuple(r) + r = [] + return + +def nunpack(s, default=0): + '''Unpacks up to 4 bytes big endian.''' + l = len(s) + if not l: + return default + elif l == 1: + return ord(s) + elif l == 2: + return struct.unpack('>H', s)[0] + elif l == 3: + if sys.version_info[0] == 2: + return struct.unpack('>L', '\x00'+s)[0] + else: + return struct.unpack('>L', bytes([0]) + s)[0] + elif l == 4: + return struct.unpack('>L', s)[0] + else: + return TypeError('invalid length: %d' % l) + + +STRICT = 0 + + +## PS Exceptions +## +class PSException(Exception): pass +class PSEOF(PSException): pass +class PSSyntaxError(PSException): pass +class PSTypeError(PSException): pass +class PSValueError(PSException): pass + + +## Basic PostScript Types +## + +# PSLiteral +class PSObject(object): pass + +class PSLiteral(PSObject): + ''' + PS literals (e.g. "/Name"). + Caution: Never create these objects directly. + Use PSLiteralTable.intern() instead. + ''' + def __init__(self, name): + self.name = name + return + + def __repr__(self): + name = [] + for char in self.name: + if not char.isalnum(): + char = '#%02x' % ord(char) + name.append(char) + return '/%s' % ''.join(name) + +# PSKeyword +class PSKeyword(PSObject): + ''' + PS keywords (e.g. "showpage"). + Caution: Never create these objects directly. + Use PSKeywordTable.intern() instead. + ''' + def __init__(self, name): + self.name = name.decode('utf-8') + return + + def __repr__(self): + return self.name + +# PSSymbolTable +class PSSymbolTable(object): + + ''' + Symbol table that stores PSLiteral or PSKeyword. + ''' + + def __init__(self, classe): + self.dic = {} + self.classe = classe + return + + def intern(self, name): + if name in self.dic: + lit = self.dic[name] + else: + lit = self.classe(name) + self.dic[name] = lit + return lit + +PSLiteralTable = PSSymbolTable(PSLiteral) +PSKeywordTable = PSSymbolTable(PSKeyword) +LIT = PSLiteralTable.intern +KWD = PSKeywordTable.intern +KEYWORD_BRACE_BEGIN = KWD(b'{') +KEYWORD_BRACE_END = KWD(b'}') +KEYWORD_ARRAY_BEGIN = KWD(b'[') +KEYWORD_ARRAY_END = KWD(b']') +KEYWORD_DICT_BEGIN = KWD(b'<<') +KEYWORD_DICT_END = KWD(b'>>') + + +def literal_name(x): + if not isinstance(x, PSLiteral): + if STRICT: + raise PSTypeError('Literal required: %r' % x) + else: + return str(x) + return x.name + +def keyword_name(x): + if not isinstance(x, PSKeyword): + if STRICT: + raise PSTypeError('Keyword required: %r' % x) + else: + return str(x) + return x.name + + +## PSBaseParser +## +EOL = re.compile(br'[\r\n]') +SPC = re.compile(br'\s') +NONSPC = re.compile(br'\S') +HEX = re.compile(br'[0-9a-fA-F]') +END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(br'[^0-9]') +END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(br'[()\\]') +OCT_STRING = re.compile(br'[0-7]') +ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } + +class EmptyArrayValue(object): + def __str__(self): + return "<>" + + +class PSBaseParser(object): + + ''' + Most basic PostScript parser that performs only basic tokenization. + ''' + BUFSIZ = 4096 + + def __init__(self, fp): + self.fp = fp + self.seek(0) + return + + def __repr__(self): + return '' % (self.fp, self.bufpos) + + def flush(self): + return + + def close(self): + self.flush() + return + + def tell(self): + return self.bufpos+self.charpos + + def poll(self, pos=None, n=80): + pos0 = self.fp.tell() + if not pos: + pos = self.bufpos+self.charpos + self.fp.seek(pos) + ##print >>sys.stderr, 'poll(%d): %r' % (pos, self.fp.read(n)) + self.fp.seek(pos0) + return + + def seek(self, pos): + ''' + Seeks the parser to the given position. + ''' + self.fp.seek(pos) + # reset the status for nextline() + self.bufpos = pos + self.buf = b'' + self.charpos = 0 + # reset the status for nexttoken() + self.parse1 = self.parse_main + self.tokens = [] + return + + def fillbuf(self): + if self.charpos < len(self.buf): return + # fetch next chunk. + self.bufpos = self.fp.tell() + self.buf = self.fp.read(self.BUFSIZ) + if not self.buf: + raise PSEOF('Unexpected EOF') + self.charpos = 0 + return + + def parse_main(self, s, i): + m = NONSPC.search(s, i) + if not m: + return (self.parse_main, len(s)) + j = m.start(0) + if isinstance(s[j], str): + # Python 2 + c = s[j] + else: + # Python 3 + c = bytes([s[j]]) + self.tokenstart = self.bufpos+j + if c == b'%': + self.token = c + return (self.parse_comment, j+1) + if c == b'/': + self.token = b'' + return (self.parse_literal, j+1) + if c in b'-+' or c.isdigit(): + self.token = c + return (self.parse_number, j+1) + if c == b'.': + self.token = c + return (self.parse_decimal, j+1) + if c.isalpha(): + self.token = c + return (self.parse_keyword, j+1) + if c == b'(': + self.token = b'' + self.paren = 1 + return (self.parse_string, j+1) + if c == b'<': + self.token = b'' + return (self.parse_wopen, j+1) + if c == b'>': + self.token = b'' + return (self.parse_wclose, j+1) + self.add_token(KWD(c)) + return (self.parse_main, j+1) + + def add_token(self, obj): + self.tokens.append((self.tokenstart, obj)) + return + + def parse_comment(self, s, i): + m = EOL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_comment, len(s)) + j = m.start(0) + self.token += s[i:j] + # We ignore comments. + #self.tokens.append(self.token) + return (self.parse_main, j) + + def parse_literal(self, s, i): + m = END_LITERAL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_literal, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'#': + self.hex = b'' + return (self.parse_literal_hex, j+1) + self.add_token(LIT(self.token)) + return (self.parse_main, j) + + def parse_literal_hex(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if HEX.match(c) and len(self.hex) < 2: + self.hex += c + return (self.parse_literal_hex, i+1) + if self.hex: + if sys.version_info[0] == 2: + self.token += chr(int(self.hex, 16)) + else: + self.token += bytes([int(self.hex, 16)]) + return (self.parse_literal, i) + + + def parse_number(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_number, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'.': + self.token += c + return (self.parse_decimal, j+1) + try: + self.add_token(int(self.token)) + except ValueError: + pass + return (self.parse_main, j) + + def parse_decimal(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_decimal, len(s)) + j = m.start(0) + self.token += s[i:j] + self.add_token(Decimal(self.token.decode('utf-8'))) + return (self.parse_main, j) + + + def parse_keyword(self, s, i): + m = END_KEYWORD.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_keyword, len(s)) + j = m.start(0) + self.token += s[i:j] + if self.token == 'true': + token = True + elif self.token == 'false': + token = False + else: + token = KWD(self.token) + self.add_token(token) + return (self.parse_main, j) + + def parse_string(self, s, i): + m = END_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_string, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'\\': + self.oct = '' + return (self.parse_string_1, j+1) + if c == b'(': + self.paren += 1 + self.token += c + return (self.parse_string, j+1) + if c == b')': + self.paren -= 1 + if self.paren: + self.token += c + return (self.parse_string, j+1) + self.add_token(self.token) + return (self.parse_main, j+1) + + + def parse_string_1(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if OCT_STRING.match(c) and len(self.oct) < 3: + self.oct += c + return (self.parse_string_1, i+1) + if self.oct: + if sys.version_info[0] == 2: + self.token += chr(int(self.oct, 8)) + else: + self.token += bytes([int(self.oct, 8)]) + return (self.parse_string, i) + if c in ESC_STRING: + + if sys.version_info[0] == 2: + self.token += chr(ESC_STRING[c]) + else: + self.token += bytes([ESC_STRING[c]]) + + return (self.parse_string, i+1) + + def parse_wopen(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c.isspace() or HEX.match(c): + return (self.parse_hexstring, i) + if c == b'<': + self.add_token(KEYWORD_DICT_BEGIN) + i += 1 + if c == b'>': + # Empty array without any contents. Why though? + # We need to add some dummy python object that will serialize to + # nothing, otherwise the code removes the whole array. + self.add_token(EmptyArrayValue()) + i += 1 + + return (self.parse_main, i) + + def parse_wclose(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c == b'>': + self.add_token(KEYWORD_DICT_END) + i += 1 + return (self.parse_main, i) + + def parse_hexstring(self, s, i): + m = END_HEX_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_hexstring, len(s)) + j = m.start(0) + self.token += s[i:j] + if sys.version_info[0] == 2: + token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), + SPC.sub('', self.token)) + else: + token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]), + SPC.sub(b'', self.token)) + self.add_token(token) + return (self.parse_main, j) + + def nexttoken(self): + while not self.tokens: + self.fillbuf() + (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) + token = self.tokens.pop(0) + return token + + def nextline(self): + ''' + Fetches a next line that ends either with \\r or \\n. + ''' + linebuf = b'' + linepos = self.bufpos + self.charpos + eol = False + while 1: + self.fillbuf() + if eol: + if sys.version_info[0] == 2: + c = self.buf[self.charpos] + else: + c = bytes([self.buf[self.charpos]]) + + # handle '\r\n' + if c == b'\n': + linebuf += c + self.charpos += 1 + break + m = EOL.search(self.buf, self.charpos) + if m: + linebuf += self.buf[self.charpos:m.end(0)] + self.charpos = m.end(0) + if sys.version_info[0] == 2: + if linebuf[-1] == b'\r': + eol = True + else: + break + else: + if bytes([linebuf[-1]]) == b'\r': + eol = True + else: + break + + else: + linebuf += self.buf[self.charpos:] + self.charpos = len(self.buf) + return (linepos, linebuf) + + def revreadlines(self): + ''' + Fetches a next line backword. This is used to locate + the trailers at the end of a file. + ''' + self.fp.seek(0, 2) + pos = self.fp.tell() + buf = b'' + while 0 < pos: + prevpos = pos + pos = max(0, pos-self.BUFSIZ) + self.fp.seek(pos) + s = self.fp.read(prevpos-pos) + if not s: break + while 1: + n = max(s.rfind(b'\r'), s.rfind(b'\n')) + if n == -1: + buf = s + buf + break + yield s[n:]+buf + s = s[:n] + buf = b'' + return + + +## PSStackParser +## +class PSStackParser(PSBaseParser): + + def __init__(self, fp): + PSBaseParser.__init__(self, fp) + self.reset() + return + + def reset(self): + self.context = [] + self.curtype = None + self.curstack = [] + self.results = [] + return + + def seek(self, pos): + PSBaseParser.seek(self, pos) + self.reset() + return + + def push(self, *objs): + self.curstack.extend(objs) + return + def pop(self, n): + objs = self.curstack[-n:] + self.curstack[-n:] = [] + return objs + def popall(self): + objs = self.curstack + self.curstack = [] + return objs + def add_results(self, *objs): + self.results.extend(objs) + return + + def start_type(self, pos, type): + self.context.append((pos, self.curtype, self.curstack)) + (self.curtype, self.curstack) = (type, []) + return + def end_type(self, type): + if self.curtype != type: + raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) + objs = [ obj for (_,obj) in self.curstack ] + (pos, self.curtype, self.curstack) = self.context.pop() + return (pos, objs) + + def do_keyword(self, pos, token): + return + + def nextobject(self, direct=False): + ''' + Yields a list of objects: keywords, literals, strings (byte arrays), + numbers, arrays and dictionaries. Arrays and dictionaries + are represented as Python sequence and dictionaries. + ''' + while not self.results: + (pos, token) = self.nexttoken() + if (isinstance(token, int) or + isinstance(token, Decimal) or + isinstance(token, bool) or + isinstance(token, bytearray) or + isinstance(token, bytes) or + isinstance(token, str) or + isinstance(token, PSLiteral)): + # normal token + self.push((pos, token)) + elif token == KEYWORD_ARRAY_BEGIN: + # begin array + self.start_type(pos, 'a') + elif token == KEYWORD_ARRAY_END: + # end array + try: + self.push(self.end_type('a')) + except PSTypeError: + if STRICT: raise + elif token == KEYWORD_DICT_BEGIN: + # begin dictionary + self.start_type(pos, 'd') + elif token == KEYWORD_DICT_END: + # end dictionary + try: + (pos, objs) = self.end_type('d') + if len(objs) % 2 != 0: + print("Incomplete dictionary construct") + objs.append("") # this isn't necessary. + # temporary fix. is this due to rental books? + # raise PSSyntaxError( + # 'Invalid dictionary construct: %r' % objs) + d = dict((literal_name(k), v) \ + for (k,v) in choplist(2, objs)) + self.push((pos, d)) + except PSTypeError: + if STRICT: raise + else: + self.do_keyword(pos, token) + if self.context: + continue + else: + if direct: + return self.pop(1)[0] + self.flush() + obj = self.results.pop(0) + return obj + + +LITERAL_CRYPT = LIT(b'Crypt') +LITERALS_FLATE_DECODE = (LIT(b'FlateDecode'), LIT(b'Fl')) +LITERALS_LZW_DECODE = (LIT(b'LZWDecode'), LIT(b'LZW')) +LITERALS_ASCII85_DECODE = (LIT(b'ASCII85Decode'), LIT(b'A85')) + + +## PDF Objects +## +class PDFObject(PSObject): pass + +class PDFException(PSException): pass +class PDFTypeError(PDFException): pass +class PDFValueError(PDFException): pass +class PDFNotImplementedError(PSException): pass + + +## PDFObjRef +## +class PDFObjRef(PDFObject): + + def __init__(self, doc, objid, genno): + if objid == 0: + if STRICT: + raise PDFValueError('PDF object id cannot be 0.') + self.doc = doc + self.objid = objid + self.genno = genno + return + + def __repr__(self): + return '' % (self.objid, self.genno) + + def resolve(self): + return self.doc.getobj(self.objid) + + +# resolve +def resolve1(x): + ''' + Resolve an object. If this is an array or dictionary, + it may still contains some indirect objects inside. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + return x + +def resolve_all(x): + ''' + Recursively resolve X and all the internals. + Make sure there is no indirect reference within the nested object. + This procedure might be slow. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + if isinstance(x, list): + x = [ resolve_all(v) for v in x ] + elif isinstance(x, dict): + for (k,v) in x.iteritems(): + x[k] = resolve_all(v) + return x + +def decipher_all(decipher, objid, genno, x): + ''' + Recursively decipher X. + ''' + if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str): + return decipher(objid, genno, x) + decf = lambda v: decipher_all(decipher, objid, genno, v) + if isinstance(x, list): + x = [decf(v) for v in x] + elif isinstance(x, dict): + x = dict((k, decf(v)) for (k, v) in iter(x.items())) + return x + + +# Type cheking +def int_value(x): + x = resolve1(x) + if not isinstance(x, int): + if STRICT: + raise PDFTypeError('Integer required: %r' % x) + return 0 + return x + +def decimal_value(x): + x = resolve1(x) + if not isinstance(x, Decimal): + if STRICT: + raise PDFTypeError('Decimal required: %r' % x) + return 0.0 + return x + +def num_value(x): + x = resolve1(x) + if not (isinstance(x, int) or isinstance(x, Decimal)): + if STRICT: + raise PDFTypeError('Int or Decimal required: %r' % x) + return 0 + return x + +def str_value(x): + x = resolve1(x) + if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)): + if STRICT: + raise PDFTypeError('String required: %r' % x) + return '' + return x + +def list_value(x): + x = resolve1(x) + if not (isinstance(x, list) or isinstance(x, tuple)): + if STRICT: + raise PDFTypeError('List required: %r' % x) + return [] + return x + +def dict_value(x): + x = resolve1(x) + if not isinstance(x, dict): + if STRICT: + raise PDFTypeError('Dict required: %r' % x) + return {} + return x + +def stream_value(x): + x = resolve1(x) + if not isinstance(x, PDFStream): + if STRICT: + raise PDFTypeError('PDFStream required: %r' % x) + return PDFStream({}, '') + return x + +# ascii85decode(data) +def ascii85decode(data): + n = b = 0 + out = b'' + for c in data: + if b'!' <= c and c <= b'u': + n += 1 + b = b*85+(c-33) + if n == 5: + out += struct.pack('>L',b) + n = b = 0 + elif c == b'z': + assert n == 0 + out += b'\0\0\0\0' + elif c == b'~': + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack('>L',b)[:n-1] + break + return out + + +## PDFStream type +class PDFStream(PDFObject): + def __init__(self, dic, rawdata, decipher=None): + length = int_value(dic.get('Length', 0)) + eol = rawdata[length:] + # quick and dirty fix for false length attribute, + # might not work if the pdf stream parser has a problem + if decipher != None and decipher.__name__ == 'decrypt_aes': + if (len(rawdata) % 16) != 0: + cutdiv = len(rawdata) // 16 + rawdata = rawdata[:16*cutdiv] + else: + if eol in (b'\r', b'\n', b'\r\n'): + rawdata = rawdata[:length] + + self.dic = dic + self.rawdata = rawdata + self.decipher = decipher + self.data = None + self.decdata = None + self.objid = None + self.genno = None + return + + def set_objid(self, objid, genno): + self.objid = objid + self.genno = genno + return + + def __repr__(self): + if self.rawdata: + return '' % \ + (self.objid, len(self.rawdata), self.dic) + else: + return '' % \ + (self.objid, len(self.data), self.dic) + + def decode(self): + assert self.data is None and self.rawdata is not None + data = self.rawdata + if self.decipher: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + if gen_xref_stm: + self.decdata = data # keep decrypted data + if 'Filter' not in self.dic: + self.data = data + self.rawdata = None + ##print self.dict + return + filters = self.dic['Filter'] + if not isinstance(filters, list): + filters = [ filters ] + for f in filters: + if f in LITERALS_FLATE_DECODE: + # will get errors if the document is encrypted. + data = zlib.decompress(data) + elif f in LITERALS_LZW_DECODE: + data = ''.join(LZWDecoder(BytesIO(data)).run()) + elif f in LITERALS_ASCII85_DECODE: + data = ascii85decode(data) + elif f == LITERAL_CRYPT: + raise PDFNotImplementedError('/Crypt filter is unsupported') + else: + raise PDFNotImplementedError('Unsupported filter: %r' % f) + # apply predictors + if 'DP' in self.dic: + params = self.dic['DP'] + else: + params = self.dic.get('DecodeParms', {}) + if 'Predictor' in params: + pred = int_value(params['Predictor']) + if pred: + if pred != 12: + raise PDFNotImplementedError( + 'Unsupported predictor: %r' % pred) + if 'Columns' not in params: + raise PDFValueError( + 'Columns undefined for predictor=12') + columns = int_value(params['Columns']) + buf = b'' + ent0 = b'\x00' * columns + for i in range(0, len(data), columns+1): + pred = data[i] + ent1 = data[i+1:i+1+columns] + if sys.version_info[0] == 2: + if pred == '\x02': + ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ + for (a,b) in zip(ent0,ent1)) + else: + if pred == 2: + ent1 = b''.join(bytes([(a+b) & 255]) \ + for (a,b) in zip(ent0,ent1)) + buf += ent1 + ent0 = ent1 + data = buf + self.data = data + self.rawdata = None + return + + def get_data(self): + if self.data is None: + self.decode() + return self.data + + def get_rawdata(self): + return self.rawdata + + def get_decdata(self): + if self.decdata is not None: + return self.decdata + data = self.rawdata + if self.decipher and data: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + return data + + +## PDF Exceptions +## +class PDFSyntaxError(PDFException): pass +class PDFNoValidXRef(PDFSyntaxError): pass +class PDFEncryptionError(PDFException): pass +class PDFPasswordIncorrect(PDFEncryptionError): pass + +# some predefined literals and keywords. +LITERAL_OBJSTM = LIT(b'ObjStm') +LITERAL_XREF = LIT(b'XRef') +LITERAL_PAGE = LIT(b'Page') +LITERAL_PAGES = LIT(b'Pages') +LITERAL_CATALOG = LIT(b'Catalog') + + +## XRefs +## + +## PDFXRef +## +class PDFXRef(object): + + def __init__(self): + self.offsets = None + return + + def __repr__(self): + return '' % len(self.offsets) + + def objids(self): + return iter(self.offsets.keys()) + + def load(self, parser): + self.offsets = {} + while 1: + try: + (pos, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + if not line: + raise PDFNoValidXRef('Premature eof: %r' % parser) + if line.startswith(b'trailer'): + parser.seek(pos) + break + f = line.strip().split(b' ') + if len(f) != 2: + raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) + try: + (start, nobjs) = map(int, f) + except ValueError: + raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) + for objid in range(start, start+nobjs): + try: + (_, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + f = line.strip().split(b' ') + if len(f) != 3: + raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) + (pos, genno, use) = f + if use != b'n': + continue + self.offsets[objid] = (int(genno.decode('utf-8')), int(pos.decode('utf-8'))) + self.load_trailer(parser) + return + + KEYWORD_TRAILER = PSKeywordTable.intern(b'trailer') + def load_trailer(self, parser): + try: + (_,kwd) = parser.nexttoken() + assert kwd is self.KEYWORD_TRAILER + (_,dic) = parser.nextobject(direct=True) + except PSEOF: + x = parser.pop(1) + if not x: + raise PDFNoValidXRef('Unexpected EOF - file corrupted') + (_,dic) = x[0] + self.trailer = dict_value(dic) + return + + def getpos(self, objid): + try: + (genno, pos) = self.offsets[objid] + except KeyError: + raise + return (None, pos) + + +## PDFXRefStream +## +class PDFXRefStream(object): + + def __init__(self): + self.index = None + self.data = None + self.entlen = None + self.fl1 = self.fl2 = self.fl3 = None + return + + def __repr__(self): + return '' % self.index + + def objids(self): + for first, size in self.index: + for objid in range(first, first + size): + yield objid + + def load(self, parser, debug=0): + (_,objid) = parser.nexttoken() # ignored + (_,genno) = parser.nexttoken() # ignored + (_,kwd) = parser.nexttoken() + (_,stream) = parser.nextobject() + if not isinstance(stream, PDFStream) or \ + stream.dic['Type'] is not LITERAL_XREF: + raise PDFNoValidXRef('Invalid PDF stream spec.') + size = stream.dic['Size'] + index = stream.dic.get('Index', (0,size)) + self.index = list(zip(itertools.islice(index, 0, None, 2), + itertools.islice(index, 1, None, 2))) + (self.fl1, self.fl2, self.fl3) = stream.dic['W'] + self.data = stream.get_data() + self.entlen = self.fl1+self.fl2+self.fl3 + self.trailer = stream.dic + return + + def getpos(self, objid): + offset = 0 + for first, size in self.index: + if first <= objid and objid < (first + size): + break + offset += size + else: + raise KeyError(objid) + i = self.entlen * ((objid - first) + offset) + ent = self.data[i:i+self.entlen] + f1 = nunpack(ent[:self.fl1], 1) + if f1 == 1: + pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) + genno = nunpack(ent[self.fl1+self.fl2:]) + return (None, pos) + elif f1 == 2: + objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) + index = nunpack(ent[self.fl1+self.fl2:]) + return (objid, index) + # this is a free object + raise KeyError(objid) + + +## PDFDocument +## +## A PDFDocument object represents a PDF document. +## Since a PDF file is usually pretty big, normally it is not loaded +## at once. Rather it is parsed dynamically as processing goes. +## A PDF parser is associated with the document. +## +class PDFDocument(object): + + def __init__(self): + self.xrefs = [] + self.objs = {} + self.parsed_objs = {} + self.root = None + self.catalog = None + self.parser = None + self.encryption = None + self.decipher = None + # dictionaries for fileopen + self.fileopen = {} + self.urlresult = {} + self.ready = False + return + + # set_parser(parser) + # Associates the document with an (already initialized) parser object. + def set_parser(self, parser): + if self.parser: + return + self.parser = parser + # The document is set to be temporarily ready during collecting + # all the basic information about the document, e.g. + # the header, the encryption information, and the access rights + # for the document. + self.ready = True + # Retrieve the information of each header that was appended + # (maybe multiple times) at the end of the document. + self.xrefs = parser.read_xref() + for xref in self.xrefs: + trailer = xref.trailer + if not trailer: continue + + # If there's an encryption info, remember it. + if 'Encrypt' in trailer: + #assert not self.encryption + try: + self.encryption = (list_value(trailer['ID']), + dict_value(trailer['Encrypt'])) + # fix for bad files + except: + self.encryption = (b'ffffffffffffffffffffffffffffffffffff', + dict_value(trailer['Encrypt'])) + if 'Root' in trailer: + self.set_root(dict_value(trailer['Root'])) + break + else: + raise PDFSyntaxError('No /Root object! - Is this really a PDF?') + # The document is set to be non-ready again, until all the + # proper initialization (asking the password key and + # verifying the access permission, so on) is finished. + self.ready = False + return + + # set_root(root) + # Set the Root dictionary of the document. + # Each PDF file must have exactly one /Root dictionary. + def set_root(self, root): + self.root = root + self.catalog = dict_value(self.root) + if self.catalog.get('Type') is not LITERAL_CATALOG: + if STRICT: + raise PDFSyntaxError('Catalog not found!') + return + # initialize(password='') + # Perform the initialization with a given password. + # This step is mandatory even if there's no password associated + # with the document. + def initialize(self, password=''): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return + (docid, param) = self.encryption + type = literal_name(param['Filter']) + if type == 'Adobe.APS' or type == "Standard" or type == "EBX_HANDLER": + print("This script is just for FOPN encryption.") + print("For standard password PDFs or Adobe PDFs, use ineptpdy.py") + raise PDFEncryptionError("Not a FileOpen-encrypted file") + if type == 'FOPN_fLock': + # remove of unnecessairy password attribute + return self.initialize_fopn_flock(docid, param) + if type == 'FOPN_foweb': + # remove of unnecessairy password attribute + return self.initialize_fopn(docid, param) + raise PDFEncryptionError('Unknown filter: param=%r' % param) + + def initialize_and_return_filter(self): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return None + + (docid, param) = self.encryption + type = literal_name(param['Filter']) + return type + + + PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ + b'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' + + + + # fileopen support + def initialize_fopn_flock(self, docid, param): + raise ADEPTError('FOPN_fLock not supported, yet ...') + # debug mode processing + global DEBUG_MODE + global IVERSION + if DEBUG_MODE == True: + if os.access('.',os.W_OK) == True: + debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') + else: + raise ADEPTError('Cannot write debug file, current directory is not writable') + self.is_printable = self.is_modifiable = self.is_extractable = True + # get parameters and add it to the fo dictionary + self.fileopen['V'] = int_value(param.get('V',2)) + # crypt base + (docid, param) = self.encryption + #rights = dict_value(param['Info']) + rights = param['Info'] + #print rights + if DEBUG_MODE == True: debugfile.write(rights + '\n\n') +## for pair in rights.split(';'): +## try: +## key, value = pair.split('=',1) +## self.fileopen[key] = value +## # fix for some misconfigured INFO variables +## except: +## pass +## kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ +## 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} +## for keys in kattr: +## try: +## self.fileopen[kattr[keys]] = self.fileopen[keys] +## del self.fileopen[keys] +## except: +## continue + # differentiate OS types +## sysplatform = sys.platform +## # if ostype is Windows +## if sysplatform=='win32': +## self.osuseragent = 'Windows NT 6.0' +## self.get_macaddress = self.get_win_macaddress +## self.fo_sethwids = self.fo_win_sethwids +## self.BrowserCookie = WinBrowserCookie +## elif sysplatform=='linux2': +## adeptout = 'Linux is not supported, yet.\n' +## raise ADEPTError(adeptout) +## self.osuseragent = 'Linux i686' +## self.get_macaddress = self.get_linux_macaddress +## self.fo_sethwids = self.fo_linux_sethwids +## else: +## adeptout = '' +## adeptout = adeptout + 'Due to various privacy violations from Apple\n' +## adeptout = adeptout + 'Mac OS X support is disabled by default.' +## raise ADEPTError(adeptout) +## # add static arguments for http/https request +## self.fo_setattributes() +## # add hardware specific arguments for http/https request +## self.fo_sethwids() +## +## if 'Code' in self.urlresult: +## if self.fileopen['Length'] == len(self.urlresult['Code']): +## self.decrypt_key = self.urlresult['Code'] +## else: +## self.decrypt_key = self.urlresult['Code'].decode('hex') +## else: +## raise ADEPTError('Cannot find decryption key.') + self.decrypt_key = 'stuff' + self.genkey = self.genkey_v2 + self.decipher = self.decrypt_rc4 + self.ready = True + return + + def initialize_fopn(self, docid, param): + # debug mode processing + global DEBUG_MODE + global IVERSION + if DEBUG_MODE == True: + if os.access('.',os.W_OK) == True: + debugfile = open('ineptpdf-'+IVERSION+'-debug.txt','w') + else: + raise ADEPTError('Cannot write debug file, current directory is not writable') + self.is_printable = self.is_modifiable = self.is_extractable = True + # get parameters and add it to the fo dictionary + self.fileopen['Length'] = int_value(param.get('Length', 0)) / 8 + self.fileopen['VEID'] = str_value(param.get('VEID')) + self.fileopen['BUILD'] = str_value(param.get('BUILD')) + self.fileopen['SVID'] = str_value(param.get('SVID')) + self.fileopen['DUID'] = str_value(param.get('DUID')) + self.fileopen['V'] = int_value(param.get('V',2)) + # crypt base + rights = str_value(param.get('INFO')).decode('base64') + rights = self.genkey_fileopeninfo(rights) + if DEBUG_MODE == True: debugfile.write(rights + '\n\n') + for pair in rights.split(';'): + try: + key, value = pair.split('=',1) + self.fileopen[key] = value + # fix for some misconfigured INFO variables + except: + pass + kattr = { 'SVID': 'ServiceID', 'DUID': 'DocumentID', 'I3ID': 'Ident3ID', \ + 'I4ID': 'Ident4ID', 'VERS': 'EncrVer', 'PRID': 'USR'} + for keys in kattr: + # fishing some misconfigured slashs out of it + try: + self.fileopen[kattr[keys]] = urllib.quote(self.fileopen[keys],safe='') + del self.fileopen[keys] + except: + continue + # differentiate OS types + sysplatform = sys.platform + # if ostype is Windows + if sysplatform=='win32': + self.osuseragent = 'Windows NT 6.0' + self.get_macaddress = self.get_win_macaddress + self.fo_sethwids = self.fo_win_sethwids + self.BrowserCookie = WinBrowserCookie + elif sysplatform=='linux2': + adeptout = 'Linux is not supported, yet.\n' + raise ADEPTError(adeptout) + self.osuseragent = 'Linux i686' + self.get_macaddress = self.get_linux_macaddress + self.fo_sethwids = self.fo_linux_sethwids + else: + adeptout = '' + adeptout = adeptout + 'Mac OS X is not supported, yet.' + adeptout = adeptout + 'Read the blogs FAQs for more information' + raise ADEPTError(adeptout) + # add static arguments for http/https request + self.fo_setattributes() + # add hardware specific arguments for http/https request + self.fo_sethwids() + #if DEBUG_MODE == True: debugfile.write(self.fileopen) + if 'UURL' in self.fileopen: + buildurl = self.fileopen['UURL'] + else: + buildurl = self.fileopen['PURL'] + # fix for bad DPRM structure + if self.fileopen['DPRM'][0] != r'/': + self.fileopen['DPRM'] = r'/' + self.fileopen['DPRM'] + # genius fix for bad server urls (IMHO) + if '?' in self.fileopen['DPRM']: + buildurl = buildurl + self.fileopen['DPRM'] + '&' + else: + buildurl = buildurl + self.fileopen['DPRM'] + '?' + + # debug customization + #self.fileopen['Machine'] = '' + #self.fileopen['Disk'] = '' + + + surl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ + 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'OSName', 'OSData', 'Language',\ + 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ + 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk',\ + 'FormHFT',\ + 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ + 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ + 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ + 'FSName', 'FowpKbd', 'OSBuild',\ + 'RequestSchema') + + #settings request and special modes + if 'EVER' in self.fileopen and float(self.fileopen['EVER']) < 3.8: + self.fileopen['Mode'] = 'ICx' + + origurl = buildurl + buildurl = buildurl + 'Request=Setting' + for keys in surl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write( 'settings url:\n') + if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') + # custom user agent identification? + if 'AGEN' in self.fileopen: + useragent = self.fileopen['AGEN'] + urllib.URLopener.version = useragent + # attribute doesn't exist - take the default user agent + else: + urllib.URLopener.version = self.osuseragent + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') +## finally: +## u.close() + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write('Settings'+'\n') + if DEBUG_MODE == True: debugfile.write(result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.surlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + # fix for bad server response + if key not in self.surlresult: + self.surlresult[key] = value + except: + pass + if 'RequestSchema' in self.surlresult: + self.fileopen['RequestSchema'] = self.surlresult['RequestSchema'] + if 'ServerSessionData' in self.surlresult: + self.fileopen['ServerSessionData'] = self.surlresult['ServerSessionData'] + if 'SetScope' in self.surlresult: + self.fileopen['RequestSchema'] = self.surlresult['SetScope'] + #print self.surlresult + if 'RetVal' in self.surlresult and 'SEMO' not in self.fileopen and(('Reason' in self.surlresult and \ + self.surlresult['Reason'] == 'AskUnp') or ('SetTarget' in self.surlresult and\ + self.surlresult['SetTarget'] == 'UnpDlg')): + # get user and password dialog + try: + self.gen_pw_dialog(self.surlresult['UnpUiName'], self.surlresult['UnpUiPass'],\ + self.surlresult['UnpUiTitle'], self.surlresult['UnpUiOk'],\ + self.surlresult['UnpUiSunk'], self.surlresult['UnpUiComm']) + except: + self.gen_pw_dialog() + + # the fileopen check might not be always right because of strange server responses + if 'SEMO' in self.fileopen and (self.fileopen['SEMO'] == '1'\ + or self.fileopen['SEMO'] == '2') and ('CSES' in self.fileopen and\ + self.fileopen['CSES'] != 'fileopen'): + # get the url name for the cookie(s) + if 'CURL' in self.fileopen: + self.surl = self.fileopen['CURL'] + if 'CSES' in self.fileopen: + self.cses = self.fileopen['CSES'] + elif 'PHOS' in self.fileopen: + self.surl = self.fileopen['PHOS'] + elif 'LHOS' in self.fileopen: + self.surl = self.fileopen['LHOS'] + else: + raise ADEPTError('unknown Cookie name.\n Check ineptpdf forum for further assistance') + self.pwfieldreq = 1 + # session cookie processing + if self.fileopen['SEMO'] == '1': + cookies = self.BrowserCookie() + #print self.cses + #print self.surl + csession = cookies.getcookie(self.cses,self.surl) + if csession != None: + self.fileopen['Session'] = csession + self.gui = False + # fallback + else: + self.pwtk = Tkinter.Tk() + self.pwtk.title('Ineptpdf8') + self.pwtk.minsize(150, 0) + infotxt1 = 'Get the session cookie key manually (Firefox step-by-step:\n'+\ + 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ + '-> Search for a cookie from ' + self.surl +' with the\n'+\ + 'name ' + self.cses +' and copy paste the content field in the\n'+\ + 'Session Content field. Remove possible spaces or new lines at the '+\ + 'end\n (cursor must be blinking right behind the last character)' + self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) + self.label0.pack() + self.label1 = Tkinter.Label(self.pwtk, text="Session Content") + self.pwfieldreq = 0 + self.gui = True + # user cookie processing + elif self.fileopen['SEMO'] == '2': + cookies = self.BrowserCookie() + #print self.cses + #print self.surl + name = cookies.getcookie('name',self.surl) + passw = cookies.getcookie('pass',self.surl) + if name != None or passw != None: + self.fileopen['UserName'] = urllib.quote(name) + self.fileopen['UserPass'] = urllib.quote(passw) + self.gui = False + # fallback + else: + self.pwtk = Tkinter.Tk() + self.pwtk.title('Ineptpdf8') + self.pwtk.minsize(150, 0) + self.label1 = Tkinter.Label(self.pwtk, text="Username") + infotxt1 = 'Get the user cookie keys manually (Firefox step-by-step:\n'+\ + 'Start Firefox -> Tools -> Options -> Privacy -> Show Cookies\n'+\ + '-> Search for cookies from ' + self.surl +' with the\n'+\ + 'name name in the user field and copy paste the content field in the\n'+\ + 'username field. Do the same with the name pass in the password field).' + self.label0 = Tkinter.Label(self.pwtk, text=infotxt1) + self.label0.pack() + self.pwfieldreq = 1 + self.gui = True +## else: +## self.pwtk = Tkinter.Tk() +## self.pwtk.title('Ineptpdf8') +## self.pwtk.minsize(150, 0) +## self.pwfieldreq = 0 +## self.label1 = Tkinter.Label(self.pwtk, text="Username") +## self.pwfieldreq = 1 +## self.gui = True + if self.gui == True: + self.un_entry = Tkinter.Entry(self.pwtk) + # cursor here + self.un_entry.focus() + self.label2 = Tkinter.Label(self.pwtk, text="Password") + self.pw_entry = Tkinter.Entry(self.pwtk, show="*") + self.button = Tkinter.Button(self.pwtk, text='Go for it!', command=self.fo_save_values) + # widget layout, stack vertical + self.label1.pack() + self.un_entry.pack() + # create a password label and field + if self.pwfieldreq == 1: + self.label2.pack() + self.pw_entry.pack() + self.button.pack() + self.pwtk.update() + # start the event loop + self.pwtk.mainloop() + + # original request + # drive through tupple for building the permission url + burl = ( 'Stamp', 'Mode', 'USR', 'ServiceID', 'DocumentID',\ + 'Ident3ID', 'Ident4ID','DocStrFmt', 'OSType', 'Language',\ + 'LngLCID', 'LngRFC1766', 'LngISO4Char', 'Build', 'ProdVer', 'EncrVer',\ + 'Machine', 'Disk', 'Uuid', 'PrevMach', 'PrevDisk', 'User', 'SaUser', 'SaSID',\ + # special security measures + 'HostIsDomain', 'PhysHostname', 'LogiHostname', 'SaRefDomain',\ + 'FormHFT', 'UserName', 'UserPass', 'Session', \ + 'SelServer', 'AcroVersion', 'AcroProduct', 'AcroReader',\ + 'AcroCanEdit', 'AcroPrefIDib', 'InBrowser', 'CliAppName',\ + 'DocIsLocal', 'DocPathUrl', 'VolName', 'VolType', 'VolSN',\ + 'FSName', 'ServerSessionData', 'FowpKbd', 'OSBuild', \ + 'DocumentSessionData', 'RequestSchema') + + buildurl = origurl + buildurl = buildurl + 'Request=DocPerm' + for keys in burl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write('1st url:'+'\n') + if DEBUG_MODE == True: debugfile.write(buildurl+'\n\n') + # custom user agent identification? + if 'AGEN' in self.fileopen: + useragent = self.fileopen['AGEN'] + urllib.URLopener.version = useragent + # attribute doesn't exist - take the default user agent + else: + urllib.URLopener.version = self.osuseragent + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') +## finally: +## u.close() + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write('1st preresult'+'\n') + if DEBUG_MODE == True: debugfile.write(result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.urlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + self.urlresult[key] = value + except: + pass +## if 'RequestSchema' in self.surlresult: +## self.fileopen['RequestSchema'] = self.urlresult['RequestSchema'] + #self.urlresult + #result[0:8] == 'RetVal=1') or (result[0:8] == 'RetVal=2'): + if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ + self.urlresult['RetVal'] != '2' and \ + self.urlresult['RetVal'] != 'Update' and \ + self.urlresult['RetVal'] != 'Answer')): + + if ('Reason' in self.urlresult and (self.urlresult['Reason'] == 'BadUserPwd'\ + or self.urlresult['Reason'] == 'AskUnp')) or ('SwitchTo' in self.urlresult\ + and (self.urlresult['SwitchTo'] == 'Dialog')): + if 'ServerSessionData' in self.urlresult: + self.fileopen['ServerSessionData'] = self.urlresult['ServerSessionData'] + if 'DocumentSessionData' in self.urlresult: + self.fileopen['DocumentSessionData'] = self.urlresult['DocumentSessionData'] + buildurl = origurl + buildurl = buildurl + 'Request=DocPerm' + self.gen_pw_dialog() + # password not found - fallback + for keys in burl: + try: + buildurl = buildurl + '&' + keys + '=' + self.fileopen[keys] + except: + continue + if DEBUG_MODE == True: debugfile.write( '2ndurl:') + if DEBUG_MODE == True: debugfile.write( buildurl+'\n\n') + # try to open the url + try: + u = urllib.urlopen(buildurl) + u.geturl() + result = u.read() + except: + raise ADEPTError('No internet connection or a blocking firewall!') + # getting rid of the line feed + if DEBUG_MODE == True: debugfile.write( '2nd preresult') + if DEBUG_MODE == True: debugfile.write( result+'\n\n') + #get rid of unnecessary characters + result = result.rstrip('\n') + result = result.rstrip(chr(13)) + result = result.lstrip('\n') + result = result.lstrip(chr(13)) + self.urlresult = {} + for pair in result.split('&'): + try: + key, value = pair.split('=',1) + self.urlresult[key] = value + except: + pass + # did it work? + if ('RetVal' in self.urlresult and (self.urlresult['RetVal'] != '1' and \ + self.urlresult['RetVal'] != '2' and + self.urlresult['RetVal'] != 'Update' and \ + self.urlresult['RetVal'] != 'Answer')): + raise ADEPTError('Decryption was not successfull.\nReason: ' + self.urlresult['Error']) + # fix for non-standard-conform fileopen pdfs +## if self.fileopen['Length'] != 5 and self.fileopen['Length'] != 16: +## if self.fileopen['V'] == 1: +## self.fileopen['Length'] = 5 +## else: +## self.fileopen['Length'] = 16 + # patch for malformed pdfs + #print len(self.urlresult['Code']) + #print self.urlresult['Code'].encode('hex') + if 'code' in self.urlresult: + self.urlresult['Code'] = self.urlresult['code'] + if 'Code' in self.urlresult: + if len(self.urlresult['Code']) == 5 or len(self.urlresult['Code']) == 16: + self.decrypt_key = self.urlresult['Code'] + else: + self.decrypt_key = self.urlresult['Code'].decode('hex') + else: + raise ADEPTError('Cannot find decryption key.') + + + + V = int_value(param.get('V',2)) + R = int_value(param.get('R')) + + + # genkey method + if V == 1 or V == 2 or V == 4: + self.genkey = self.genkey_v2 + elif V == 3: + self.genkey = self.genkey_v3 + elif V >= 5: + self.genkey = self.genkey_v5 + + set_decipher = False + + if V >= 4: + # Check if we need new genkey_v4 - only if we're using AES. + try: + for key in param['CF']: + algo = str(param["CF"][key]["CFM"]) + if algo == "/AESV2": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/AESV3": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/V2": + set_decipher = True + self.decipher = self.decrypt_rc4 + except: + pass + + # rc4 + if V < 4: + self.decipher = self.decrypt_rc4 # XXX may be AES + # aes + if not set_decipher: + # This should usually already be set by now. + # If it's not, assume that V4 and newer are using AES + if V >= 4: + self.decipher = self.decrypt_aes + self.ready = True + return + + def gen_pw_dialog(self, Username='Username', Password='Password', Title='User/Password Authentication',\ + OK='Proceed', Text1='Authorization', Text2='Enter Required Data'): + self.pwtk = Tkinter.Tk() + self.pwtk.title(Title) + self.pwtk.minsize(150, 0) + self.label1 = Tkinter.Label(self.pwtk, text=Text1) + self.label2 = Tkinter.Label(self.pwtk, text=Text2) + self.label3 = Tkinter.Label(self.pwtk, text=Username) + self.pwfieldreq = 1 + self.gui = True + self.un_entry = Tkinter.Entry(self.pwtk) + # cursor here + self.un_entry.focus() + self.label4 = Tkinter.Label(self.pwtk, text=Password) + self.pw_entry = Tkinter.Entry(self.pwtk, show="*") + self.button = Tkinter.Button(self.pwtk, text=OK, command=self.fo_save_values) + # widget layout, stack vertical + self.label1.pack() + self.label2.pack() + self.label3.pack() + self.un_entry.pack() + # create a password label and field + if self.pwfieldreq == 1: + self.label4.pack() + self.pw_entry.pack() + self.button.pack() + self.pwtk.update() + # start the event loop + self.pwtk.mainloop() + + # genkey functions + def genkey_v2(self, objid, genno): + objid = struct.pack(' -1: + mac = line.split()[4] + break + return mac.replace(':','') + except: + raise ADEPTError('Cannot find MAC address. Get forum help.') + + def get_win_macaddress(self): + try: + gasize = c_ulong(5000) + p = create_string_buffer(5000) + GetAdaptersInfo = windll.iphlpapi.GetAdaptersInfo + GetAdaptersInfo(byref(p),byref(gasize)) + return p[0x194:0x19a].encode('hex') + except: + raise ADEPTError('Cannot find MAC address. Get forum help.') + + # custom conversion 5 bytes to 8 chars method + def fo_convert5to8(self, edisk): + # byte to number/char mapping table + darray=[0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,\ + 0x46,0x47,0x48,0x4A,0x4B,0x4C,0x4D,0x4E,0x50,0x51,0x52,0x53,0x54,\ + 0x55,0x56,0x57,0x58,0x59,0x5A] + pdid = struct.pack('> 5 + outputhw = outputhw + chr(darray[index]) + pdid = (ord(edisk[4]) << 2)|pdid + # get the last 2 bits from the hwid + low part of the cpuid + for i in range(0,2): + index = pdid & 0x1f + # shift the disk id 5 bits to the right + pdid = pdid >> 5 + outputhw = outputhw + chr(darray[index]) + return outputhw + + # Linux processing + def fo_linux_sethwids(self): + # linux specific attributes + self.fileopen['OSType']='Linux' + self.fileopen['AcroProduct']='AcroReader' + self.fileopen['AcroReader']='Yes' + self.fileopen['AcroVersion']='9.101' + self.fileopen['FSName']='ext3' + self.fileopen['Build']='878' + self.fileopen['ProdVer']='1.8.5.1' + self.fileopen['OSBuild']='2.6.33' + # write hardware keys + hwkey = 0 + pmac = self.get_macaddress().decode("hex"); + self.fileopen['Disk'] = self.fo_convert5to8(pmac[1:]) + # get primary used default mac address + self.fileopen['Machine'] = self.fo_convert5to8(pmac[1:]) + # get uuid + # check for reversed offline handler 6AB83F4Ah + AFh 6AB83F4Ah + if 'LILA' in self.fileopen: + pass + if 'Ident4ID' in self.fileopen: + self.fileopen['User'] = getpass.getuser() + self.fileopen['SaUser'] = getpass.getuser() + try: + cuser = winreg.HKEY_CURRENT_USER + FOW3_UUID = 'Software\\Fileopen' + regkey = winreg.OpenKey(cuser, FOW3_UUID) + userkey = winreg.QueryValueEx(regkey, 'Fowp3Uuid')[0] +# if self.genkey_cryptmach(userkey)[0:4] != 'ec20': + self.fileopen['Uuid'] = self.genkey_cryptmach(userkey)[4:] +## elif self.genkey_cryptmach(userkey)[0:4] != 'ec20': +## self.fileopen['Uuid'] = self.genkey_cryptmach(userkey,1)[4:] +## else: + except: + raise ADEPTError('Cannot find FowP3Uuid file - reason might be Adobe (Reader) X.'\ + 'Read the FAQs for more information how to solve the problem.') + else: + self.fileopen['Uuid'] = str(uuid.uuid1()) + # get time stamp + self.fileopen['Stamp'] = str(time.time())[:-3] + # get fileopen input pdf name + path + self.fileopen['DocPathUrl'] = 'file%3a%2f%2f%2f'\ + + urllib.quote(os.path.normpath(INPUTFILEPATH)) + # clear the link + #INPUTFILEPATH = '' +## # get volume name (urllib quote necessairy?) urllib.quote( +## self.fileopen['VolName'] = win32api.GetVolumeInformation("C:\\")[0] +## # get volume serial number +## self.fileopen['VolSN'] = str(win32api.GetVolumeInformation("C:\\")[1]) + return + + # Windows processing + def fo_win_sethwids(self): + # Windows specific attributes + self.fileopen['OSType']='Windows' + self.fileopen['OSName']='Vista' + self.fileopen['OSData']='Service%20Pack%204' + self.fileopen['AcroProduct']='Reader' + self.fileopen['AcroReader']='Yes' + self.fileopen['OSBuild']='7600' + self.fileopen['AcroVersion']='9.1024' + self.fileopen['Build']='879' + # write hardware keys + hwkey = 0 + # get the os type and save it in ostype + try: + import win32api + import win32security + import win32file + except: + raise ADEPTError('PyWin Extension (Win32API module) needed.\n'+\ + 'Download from http://sourceforge.net/projects/pywin32/files/ ') + try: + import winreg + except ImportError: + import _winreg as winreg + try: + v0 = win32api.GetVolumeInformation('C:\\') + v1 = win32api.GetSystemInfo()[6] + # fix for possible negative integer (Python problem) + volserial = v0[1] & 0xffffffff + lowcpu = v1 & 255 + highcpu = (v1 >> 8) & 255 + # changed to int + volserial = struct.pack(' 0 and mode == True: + m.update(key_string[:(13-len(uname))]) + md5sum = m.digest()[0:16] + # print md5sum.encode('hex') + # normal ident4id calculation + retval = [] + for sdata in data: + retval.append(ARC4.new(md5sum).decrypt(sdata)) + for rval in retval: + if rval[:4] == 'ec20': + return rval[4:] + return False + # start normal execution + # list for username variants + unamevars = [] + # fill username variants list + unamevars.append(self.user) + unamevars.append(self.user + chr(0)) + unamevars.append(self.user.lower()) + unamevars.append(self.user.lower() + chr(0)) + unamevars.append(self.user.upper()) + unamevars.append(self.user.upper() + chr(0)) + # go through it + for uname in unamevars: + result = genkeysub(uname, True) + if result != False: + return result + result = genkeysub(uname) + if result != False: + return result + # didn't find it, return false + return False +## raise ADEPTError('Unsupported Ident4D Decryption,\n'+\ +## 'report the bug to the ineptpdf script forum') + + KEYWORD_OBJ = KWD(b'obj') + + def getobj(self, objid): + if not self.ready: + raise PDFException('PDFDocument not initialized') + #assert self.xrefs + if objid in self.objs: + genno = 0 + obj = self.objs[objid] + else: + for xref in self.xrefs: + try: + (stmid, index) = xref.getpos(objid) + break + except KeyError: + pass + else: + #if STRICT: + # raise PDFSyntaxError('Cannot locate objid=%r' % objid) + return None + if stmid: + if gen_xref_stm: + return PDFObjStmRef(objid, stmid, index) + # Stuff from pdfminer: extract objects from object stream + stream = stream_value(self.getobj(stmid)) + if stream.dic.get('Type') is not LITERAL_OBJSTM: + if STRICT: + raise PDFSyntaxError('Not a stream object: %r' % stream) + try: + n = stream.dic['N'] + except KeyError: + if STRICT: + raise PDFSyntaxError('N is not defined: %r' % stream) + n = 0 + + if stmid in self.parsed_objs: + objs = self.parsed_objs[stmid] + else: + parser = PDFObjStrmParser(stream.get_data(), self) + objs = [] + try: + while 1: + (_,obj) = parser.nextobject() + objs.append(obj) + except PSEOF: + pass + self.parsed_objs[stmid] = objs + genno = 0 + i = n*2+index + try: + obj = objs[i] + except IndexError: + raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) + if isinstance(obj, PDFStream): + obj.set_objid(objid, 0) +### + else: + self.parser.seek(index) + (_,objid1) = self.parser.nexttoken() # objid + (_,genno) = self.parser.nexttoken() # genno + #assert objid1 == objid, (objid, objid1) + (_,kwd) = self.parser.nexttoken() + # #### hack around malformed pdf files + # assert objid1 == objid, (objid, objid1) +## if objid1 != objid: +## x = [] +## while kwd is not self.KEYWORD_OBJ: +## (_,kwd) = self.parser.nexttoken() +## x.append(kwd) +## if x: +## objid1 = x[-2] +## genno = x[-1] +## + if kwd is not self.KEYWORD_OBJ: + raise PDFSyntaxError( + 'Invalid object spec: offset=%r' % index) + (_,obj) = self.parser.nextobject() + if isinstance(obj, PDFStream): + obj.set_objid(objid, genno) + if self.decipher: + obj = decipher_all(self.decipher, objid, genno, obj) + self.objs[objid] = obj + return obj + +# helper class for cookie retrival +class WinBrowserCookie(): + def __init__(self): + pass + def getcookie(self, cname, chost): + # check firefox db + fprofile = os.environ['AppData']+r'\Mozilla\Firefox' + pinifile = 'profiles.ini' + fini = os.path.normpath(fprofile + '\\' + pinifile) + try: + with open(fini,'r') as ffini: + firefoxini = ffini.read() + # Firefox not installed or on an USB stick + except: + return None + for pair in firefoxini.split('\n'): + try: + key, value = pair.split('=',1) + if key == 'Path': + fprofile = os.path.normpath(fprofile+'//'+value+'//'+'cookies.sqlite') + break + # asdf + except: + continue + if os.path.isfile(fprofile): + try: + con = sqlite3.connect(fprofile,1) + except: + raise ADEPTError('Firefox Cookie data base locked. Close Firefox and try again') + cur = con.cursor() + try: + cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) + except Exception: + raise ADEPTError('Firefox Cookie database is locked. Close Firefox and try again') + try: + return cur.fetchone()[0] + except Exception: + # sometimes is a dot in front of the host + chost = '.'+chost + cur.execute("select value from moz_cookies where name=? and host=?", (cname, chost)) + try: + return cur.fetchone()[0] + except: + return None + +class PDFObjStmRef(object): + maxindex = 0 + def __init__(self, objid, stmid, index): + self.objid = objid + self.stmid = stmid + self.index = index + if index > PDFObjStmRef.maxindex: + PDFObjStmRef.maxindex = index + + +## PDFParser +## +class PDFParser(PSStackParser): + + def __init__(self, doc, fp): + PSStackParser.__init__(self, fp) + self.doc = doc + self.doc.set_parser(self) + return + + def __repr__(self): + return '' + + KEYWORD_R = KWD(b'R') + KEYWORD_ENDOBJ = KWD(b'endobj') + KEYWORD_STREAM = KWD(b'stream') + KEYWORD_XREF = KWD(b'xref') + KEYWORD_STARTXREF = KWD(b'startxref') + def do_keyword(self, pos, token): + if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): + self.add_results(*self.pop(1)) + return + if token is self.KEYWORD_ENDOBJ: + self.add_results(*self.pop(4)) + return + + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + + if token is self.KEYWORD_STREAM: + # stream object + ((_,dic),) = self.pop(1) + dic = dict_value(dic) + try: + objlen = int_value(dic['Length']) + except KeyError: + if STRICT: + raise PDFSyntaxError('/Length is undefined: %r' % dic) + objlen = 0 + self.seek(pos) + try: + (_, line) = self.nextline() # 'stream' + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + return + pos += len(line) + self.fp.seek(pos) + data = self.fp.read(objlen) + self.seek(pos+objlen) + while 1: + try: + (linepos, line) = self.nextline() + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + break + if b'endstream' in line: + i = line.index(b'endstream') + objlen += i + data += line[:i] + break + objlen += len(line) + data += line + self.seek(pos+objlen) + obj = PDFStream(dic, data, self.doc.decipher) + self.push((pos, obj)) + return + + # others + self.push((pos, token)) + return + + def find_xref(self): + # search the last xref table by scanning the file backwards. + prev = None + for line in self.revreadlines(): + line = line.strip() + if line == b'startxref': break + if line: + prev = line + else: + raise PDFNoValidXRef('Unexpected EOF') + return int(prev) + + # read xref table + def read_xref_from(self, start, xrefs): + self.seek(start) + self.reset() + try: + (pos, token) = self.nexttoken() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF') + if isinstance(token, int): + # XRefStream: PDF-1.5 + if GEN_XREF_STM == 1: + global gen_xref_stm + gen_xref_stm = True + self.seek(pos) + self.reset() + xref = PDFXRefStream() + xref.load(self) + else: + if token is not self.KEYWORD_XREF: + raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % + (pos, token)) + self.nextline() + xref = PDFXRef() + xref.load(self) + xrefs.append(xref) + trailer = xref.trailer + if 'XRefStm' in trailer: + pos = int_value(trailer['XRefStm']) + self.read_xref_from(pos, xrefs) + if 'Prev' in trailer: + # find previous xref + pos = int_value(trailer['Prev']) + self.read_xref_from(pos, xrefs) + return + + # read xref tables and trailers + def read_xref(self): + xrefs = [] + trailerpos = None + try: + pos = self.find_xref() + self.read_xref_from(pos, xrefs) + except PDFNoValidXRef: + # fallback + self.seek(0) + pat = re.compile(rb'^(\d+)\s+(\d+)\s+obj\b') + offsets = {} + xref = PDFXRef() + while 1: + try: + (pos, line) = self.nextline() + except PSEOF: + break + if line.startswith(b'trailer'): + trailerpos = pos # remember last trailer + m = pat.match(line) + if not m: continue + (objid, genno) = m.groups() + offsets[int(objid)] = (0, pos) + if not offsets: raise + xref.offsets = offsets + if trailerpos: + self.seek(trailerpos) + xref.load_trailer(self) + xrefs.append(xref) + return xrefs + +## PDFObjStrmParser +## +class PDFObjStrmParser(PDFParser): + + def __init__(self, data, doc): + PSStackParser.__init__(self, BytesIO(data)) + self.doc = doc + return + + def flush(self): + self.add_results(*self.popall()) + return + + KEYWORD_R = KWD(b'R') + def do_keyword(self, pos, token): + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + # others + self.push((pos, token)) + return + +### +### My own code, for which there is none else to blame + +class PDFSerializer(object): + def __init__(self, inf, keypath): + global GEN_XREF_STM, gen_xref_stm + gen_xref_stm = GEN_XREF_STM > 1 + self.version = inf.read(8) + inf.seek(0) + self.doc = doc = PDFDocument() + parser = PDFParser(doc, inf) + doc.initialize(keypath) + self.objids = objids = set() + for xref in reversed(doc.xrefs): + trailer = xref.trailer + for objid in xref.objids(): + objids.add(objid) + trailer = dict(trailer) + trailer.pop('Prev', None) + trailer.pop('XRefStm', None) + if 'Encrypt' in trailer: + objids.remove(trailer.pop('Encrypt').objid) + self.trailer = trailer + + def dump(self, outf): + self.outf = outf + self.write(self.version) + self.write(b'\n%\xe2\xe3\xcf\xd3\n') + doc = self.doc + objids = self.objids + xrefs = {} + maxobj = max(objids) + trailer = dict(self.trailer) + trailer['Size'] = maxobj + 1 + for objid in objids: + obj = doc.getobj(objid) + if isinstance(obj, PDFObjStmRef): + xrefs[objid] = obj + continue + if obj is not None: + try: + genno = obj.genno + except AttributeError: + genno = 0 + xrefs[objid] = (self.tell(), genno) + self.serialize_indirect(objid, obj) + startxref = self.tell() + + if not gen_xref_stm: + self.write(b'xref\n') + self.write(b'0 %d\n' % (maxobj + 1,)) + for objid in range(0, maxobj + 1): + if objid in xrefs: + # force the genno to be 0 + self.write(b"%010d 00000 n \n" % xrefs[objid][0]) + else: + self.write(b"%010d %05d f \n" % (0, 65535)) + + self.write(b'trailer\n') + self.serialize_object(trailer) + self.write(b'\nstartxref\n%d\n%%%%EOF' % startxref) + + else: # Generate crossref stream. + + # Calculate size of entries + maxoffset = max(startxref, maxobj) + maxindex = PDFObjStmRef.maxindex + fl2 = 2 + power = 65536 + while maxoffset >= power: + fl2 += 1 + power *= 256 + fl3 = 1 + power = 256 + while maxindex >= power: + fl3 += 1 + power *= 256 + + index = [] + first = None + prev = None + data = [] + # Put the xrefstream's reference in itself + startxref = self.tell() + maxobj += 1 + xrefs[maxobj] = (startxref, 0) + for objid in sorted(xrefs): + if first is None: + first = objid + elif objid != prev + 1: + index.extend((first, prev - first + 1)) + first = objid + prev = objid + objref = xrefs[objid] + if isinstance(objref, PDFObjStmRef): + f1 = 2 + f2 = objref.stmid + f3 = objref.index + else: + f1 = 1 + f2 = objref[0] + # we force all generation numbers to be 0 + # f3 = objref[1] + f3 = 0 + + data.append(struct.pack('>B', f1)) + data.append(struct.pack('>L', f2)[-fl2:]) + data.append(struct.pack('>L', f3)[-fl3:]) + index.extend((first, prev - first + 1)) + data = zlib.compress(''.join(data)) + dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, + 'W': [1, fl2, fl3], 'Length': len(data), + 'Filter': LITERALS_FLATE_DECODE[0], + 'Root': trailer['Root'],} + if 'Info' in trailer: + dic['Info'] = trailer['Info'] + xrefstm = PDFStream(dic, data) + self.serialize_indirect(maxobj, xrefstm) + self.write(b'startxref\n%d\n%%%%EOF' % startxref) + def write(self, data): + self.outf.write(data) + self.last = data[-1:] + + def tell(self): + return self.outf.tell() + + def escape_string(self, string): + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'\n', b'\\n') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return string + + def serialize_object(self, obj): + if isinstance(obj, dict): + # Correct malformed Mac OS resource forks for Stanza + if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ + and isinstance(obj['Type'], int): + obj['Subtype'] = obj['Type'] + del obj['Type'] + # end - hope this doesn't have bad effects + self.write(b'<<') + for key, val in obj.items(): + self.write(str(LIT(key.encode('utf-8'))).encode('utf-8')) + self.serialize_object(val) + self.write(b'>>') + elif isinstance(obj, list): + self.write(b'[') + for val in obj: + self.serialize_object(val) + self.write(b']') + elif isinstance(obj, bytearray): + self.write(b'(%s)' % self.escape_string(obj)) + elif isinstance(obj, bytes): + self.write(b'<%s>' % binascii.hexlify(obj).upper()) + elif isinstance(obj, str): + self.write(b'(%s)' % self.escape_string(obj.encode('utf-8'))) + elif isinstance(obj, bool): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).lower().encode('utf-8')) + elif isinstance(obj, int): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, Decimal): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, PDFObjRef): + if self.last.isalnum(): + self.write(b' ') + self.write(b'%d %d R' % (obj.objid, 0)) + elif isinstance(obj, PDFStream): + ### If we don't generate cross ref streams the object streams + ### are no longer useful, as we have extracted all objects from + ### them. Therefore leave them out from the output. + if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: + self.write(b'(deleted)') + else: + data = obj.get_decdata() + + # Fix length: + # We've decompressed and then recompressed the PDF stream. + # Depending on the algorithm, the implementation, and the compression level, + # the resulting recompressed stream is unlikely to have the same length as the original. + # So we need to update the PDF object to contain the new proper length. + + # Without this change, all PDFs exported by this plugin are slightly corrupted - + # even though most if not all PDF readers can correct that on-the-fly. + + if 'Length' in obj.dic: + obj.dic['Length'] = len(data) + + + self.serialize_object(obj.dic) + self.write(b'stream\n') + self.write(data) + self.write(b'\nendstream') + else: + data = str(obj).encode('utf-8') + if bytes([data[0]]).isalnum() and self.last.isalnum(): + self.write(b' ') + self.write(data) + + def serialize_indirect(self, objid, obj): + self.write(b'%d 0 obj' % (objid,)) + self.serialize_object(obj) + if self.last.isalnum(): + self.write(b'\n') + self.write(b'endobj\n') + +def cli_main(argv=sys.argv): + progname = os.path.basename(argv[0]) + if RSA is None: + print "%s: This script requires PyCrypto, which must be installed " \ + "separately. Read the top-of-script comment for details." % \ + (progname,) + return 1 + if len(argv) != 4: + print "usage: %s KEYFILE INBOOK OUTBOOK" % (progname,) + return 1 + keypath, inpath, outpath = argv[1:] + with open(inpath, 'rb') as inf: + serializer = PDFSerializer(inf, keypath) + # hope this will fix the 'bad file descriptor' problem + with open(outpath, 'wb') as outf: + # help construct to make sure the method runs to the end + serializer.dump(outf) + return 0 + + +class DecryptionDialog(Tkinter.Frame): + def __init__(self, root): + # debug mode debugging + global DEBUG_MODE + Tkinter.Frame.__init__(self, root, border=5) + ltext='Select file for decryption\n(Ignore Password / Key file option for Fileopen/APS PDFs)' + self.status = Tkinter.Label(self, text=ltext) + self.status.pack(fill=Tkconstants.X, expand=1) + body = Tkinter.Frame(self) + body.pack(fill=Tkconstants.X, expand=1) + sticky = Tkconstants.E + Tkconstants.W + body.grid_columnconfigure(1, weight=2) + Tkinter.Label(body, text='Password\nor Key file').grid(row=0) + self.keypath = Tkinter.Entry(body, width=30) + self.keypath.grid(row=0, column=1, sticky=sticky) + if os.path.exists('adeptkey.der'): + self.keypath.insert(0, 'adeptkey.der') + button = Tkinter.Button(body, text="...", command=self.get_keypath) + button.grid(row=0, column=2) + Tkinter.Label(body, text='Input file').grid(row=1) + self.inpath = Tkinter.Entry(body, width=30) + self.inpath.grid(row=1, column=1, sticky=sticky) + button = Tkinter.Button(body, text="...", command=self.get_inpath) + button.grid(row=1, column=2) + Tkinter.Label(body, text='Output file').grid(row=2) + self.outpath = Tkinter.Entry(body, width=30) + self.outpath.grid(row=2, column=1, sticky=sticky) + debugmode = Tkinter.Checkbutton(self, text = "Debug Mode (writable directory required)", command=self.debug_toggle, height=2, \ + width = 40) + debugmode.pack() + button = Tkinter.Button(body, text="...", command=self.get_outpath) + button.grid(row=2, column=2) + buttons = Tkinter.Frame(self) + buttons.pack() + + + botton = Tkinter.Button( + buttons, text="Decrypt", width=10, command=self.decrypt) + botton.pack(side=Tkconstants.LEFT) + Tkinter.Frame(buttons, width=10).pack(side=Tkconstants.LEFT) + button = Tkinter.Button( + buttons, text="Quit", width=10, command=self.quit) + button.pack(side=Tkconstants.RIGHT) + + + def get_keypath(self): + keypath = tkFileDialog.askopenfilename( + parent=None, title='Select ADEPT key file', + defaultextension='.der', filetypes=[('DER-encoded files', '.der'), + ('All Files', '.*')]) + if keypath: + keypath = os.path.normpath(os.path.realpath(keypath)) + self.keypath.delete(0, Tkconstants.END) + self.keypath.insert(0, keypath) + return + + def get_inpath(self): + inpath = tkFileDialog.askopenfilename( + parent=None, title='Select ADEPT or FileOpen-encrypted PDF file to decrypt', + defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), + ('All files', '.*')]) + if inpath: + inpath = os.path.normpath(os.path.realpath(inpath)) + self.inpath.delete(0, Tkconstants.END) + self.inpath.insert(0, inpath) + return + + def debug_toggle(self): + global DEBUG_MODE + if DEBUG_MODE == False: + DEBUG_MODE = True + else: + DEBUG_MODE = False + + def get_outpath(self): + outpath = tkFileDialog.asksaveasfilename( + parent=None, title='Select unencrypted PDF file to produce', + defaultextension='.pdf', filetypes=[('PDF files', '.pdf'), + ('All files', '.*')]) + if outpath: + outpath = os.path.normpath(os.path.realpath(outpath)) + self.outpath.delete(0, Tkconstants.END) + self.outpath.insert(0, outpath) + return + + def decrypt(self): + global INPUTFILEPATH + global KEYFILEPATH + global PASSWORD + keypath = self.keypath.get() + inpath = self.inpath.get() + outpath = self.outpath.get() + if not keypath or not os.path.exists(keypath): + # keyfile doesn't exist + KEYFILEPATH = False + PASSWORD = keypath + if not inpath or not os.path.exists(inpath): + self.status['text'] = 'Specified input file does not exist' + return + if not outpath: + self.status['text'] = 'Output file not specified' + return + if inpath == outpath: + self.status['text'] = 'Must have different input and output files' + return + # patch for non-ascii characters + INPUTFILEPATH = inpath.encode('utf-8') + argv = [sys.argv[0], keypath, inpath, outpath] + self.status['text'] = 'Processing ...' + try: + cli_main(argv) + except Exception, a: + self.status['text'] = 'Error: ' + str(a) + return + self.status['text'] = 'File successfully decrypted.\n'+\ + 'Close this window or decrypt another pdf file.' + return + +def gui_main(): + root = Tkinter.Tk() + if RSA is None: + root.withdraw() + tkMessageBox.showerror( + "PDF FileOpen Decrypter", + "This script requires PyCrypto, which must be installed " + "separately. Read the top-of-script comment for details.") + return 1 + root.title('FileOpen PDF Decrypter 8.5.0') + root.resizable(True, False) + root.minsize(370, 0) + DecryptionDialog(root).pack(fill=Tkconstants.X, expand=1) + root.mainloop() + return 0 + + +if __name__ == '__main__': + if len(sys.argv) > 1: + sys.exit(cli_main()) + sys.exit(gui_main()) -- cgit v1.2.3