Package common :: Module pdf_ext
[frames] | no frames]

Source Code for Module common.pdf_ext

  1  # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
  2  # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr 
  3  # 
  4  # This file is part of logilab-common. 
  5  # 
  6  # logilab-common is free software: you can redistribute it and/or modify it under 
  7  # the terms of the GNU Lesser General Public License as published by the Free 
  8  # Software Foundation, either version 2.1 of the License, or (at your option) any 
  9  # later version. 
 10  # 
 11  # logilab-common is distributed in the hope that it will be useful, but WITHOUT 
 12  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
 13  # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more 
 14  # details. 
 15  # 
 16  # You should have received a copy of the GNU Lesser General Public License along 
 17  # with logilab-common.  If not, see <http://www.gnu.org/licenses/>. 
 18  """Manipulate pdf and fdf files (pdftk recommended). 
 19   
 20  Notes regarding pdftk, pdf forms and fdf files (form definition file) 
 21  fields names can be extracted with: 
 22   
 23      pdftk orig.pdf generate_fdf output truc.fdf 
 24   
 25  to merge fdf and pdf: 
 26   
 27      pdftk orig.pdf fill_form test.fdf output result.pdf [flatten] 
 28   
 29  without flatten, one could further edit the resulting form. 
 30  with flatten, everything is turned into text. 
 31   
 32   
 33   
 34   
 35  """ 
 36  __docformat__ = "restructuredtext en" 
 37  # XXX seems very unix specific 
 38  # TODO: check availability of pdftk at import 
 39   
 40   
 41  import os 
 42   
 43  HEAD="""%FDF-1.2 
 44  %\xE2\xE3\xCF\xD3 
 45  1 0 obj 
 46  << 
 47  /FDF 
 48  << 
 49  /Fields [ 
 50  """ 
 51   
 52  TAIL="""] 
 53  >> 
 54  >> 
 55  endobj 
 56  trailer 
 57   
 58  << 
 59  /Root 1 0 R 
 60  >> 
 61  %%EOF 
 62  """ 
 63   
64 -def output_field( f ):
65 return "\xfe\xff" + "".join( [ "\x00"+c for c in f ] )
66
67 -def extract_keys(lines):
68 keys = [] 69 for line in lines: 70 if line.startswith('/V'): 71 pass #print 'value',line 72 elif line.startswith('/T'): 73 key = line[7:-2] 74 key = ''.join(key.split('\x00')) 75 keys.append( key ) 76 return keys
77
78 -def write_field(out, key, value):
79 out.write("<<\n") 80 if value: 81 out.write("/V (%s)\n" %value) 82 else: 83 out.write("/V /\n") 84 out.write("/T (%s)\n" % output_field(key) ) 85 out.write(">> \n")
86
87 -def write_fields(out, fields):
88 out.write(HEAD) 89 for (key, value, comment) in fields: 90 write_field(out, key, value) 91 write_field(out, key+"a", value) # pour copie-carbone sur autres pages 92 out.write(TAIL)
93
94 -def extract_keys_from_pdf(filename):
95 # what about using 'pdftk filename dump_data_fields' and parsing the output ? 96 os.system('pdftk %s generate_fdf output /tmp/toto.fdf' % filename) 97 lines = file('/tmp/toto.fdf').readlines() 98 return extract_keys(lines)
99 100
101 -def fill_pdf(infile, outfile, fields):
102 write_fields(file('/tmp/toto.fdf', 'w'), fields) 103 os.system('pdftk %s fill_form /tmp/toto.fdf output %s flatten' % (infile, outfile))
104
105 -def testfill_pdf(infile, outfile):
106 keys = extract_keys_from_pdf(infile) 107 fields = [] 108 for key in keys: 109 fields.append( (key, key, '') ) 110 fill_pdf(infile, outfile, fields)
111