Package logilab :: Package common :: Module dbf
[frames] | no frames]

Source Code for Module logilab.common.dbf

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  This is a DBF reader which reads Visual Fox Pro DBF format with Memo field. 
  4   
  5  Usage: 
  6      rec = readDbf('test.dbf') 
  7      for line in rec: 
  8          print line['name'] 
  9   
 10  @author Yusdi Santoso 
 11  @date 13/07/2007 
 12  http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html 
 13  page says code is "available as is without any warranty or support". 
 14  """ 
 15   
 16  import struct 
 17  import os, os.path 
 18  import sys 
 19  import csv 
 20  import tempfile 
 21  import ConfigParser 
 22   
23 -class Dbase:
24 - def __init__(self):
25 self.fdb = None 26 self.fmemo = None 27 self.db_data = None 28 self.memo_data = None 29 self.fields = None 30 self.num_records = 0 31 self.header = None 32 self.memo_file = '' 33 self.memo_header = None 34 self.memo_block_size = 0 35 self.memo_header_len = 0
36
37 - def _drop_after_NULL(self, txt):
38 for i in range(0, len(txt)): 39 if ord(struct.unpack('c', txt[i])[0])==0: 40 return txt[:i] 41 return txt
42
43 - def _reverse_endian(self, num):
44 if not len(num): 45 return 0 46 val = struct.unpack('<L', num) 47 val = struct.pack('>L', val[0]) 48 val = struct.unpack('>L', val) 49 return val[0]
50
51 - def _assign_ids(self, lst, ids):
52 result = {} 53 idx = 0 54 for item in lst: 55 id = ids[idx] 56 result[id] = item 57 idx += 1 58 return result
59
60 - def open(self, db_name):
61 filesize = os.path.getsize(db_name) 62 if filesize <= 68: 63 raise IOError, 'The file is not large enough to be a dbf file' 64 65 self.fdb = open(db_name, 'rb') 66 67 self.memo_file = '' 68 if os.path.isfile(db_name[0:-1] + 't'): 69 self.memo_file = db_name[0:-1] + 't' 70 elif os.path.isfile(db_name[0:-3] + 'fpt'): 71 self.memo_file = db_name[0:-3] + 'fpt' 72 73 if self.memo_file: 74 #Read memo file 75 self.fmemo = open(self.memo_file, 'rb') 76 self.memo_data = self.fmemo.read() 77 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo_data[:8]), ['Block size']) 78 block_size = self.memo_header['Block size'] 79 if not block_size: 80 block_size = 512 81 self.memo_block_size = block_size 82 self.memo_header_len = block_size 83 memo_size = os.path.getsize(self.memo_file) 84 85 #Start reading data file 86 data = self.fdb.read(32) 87 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size']) 88 self.header['id'] = hex(self.header['id']) 89 90 self.num_records = self.header['# of Records'] 91 data = self.fdb.read(self.header['Header Size']-34) 92 self.fields = {} 93 x = 0 94 header_pattern = '<11s c 4x B B 14x' 95 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision'] 96 pattern_len = 32 97 for offset in range(0, len(data), 32): 98 if ord(data[offset])==0x0d: 99 break 100 x += 1 101 data_subset = data[offset: offset+pattern_len] 102 if len(data_subset) < pattern_len: 103 data_subset += ' '*(pattern_len-len(data_subset)) 104 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data_subset), ids) 105 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x]['Field Name']) 106 107 self.fdb.read(3) 108 if self.header['# of Records']: 109 data_size = (self.header['# of Records'] * self.header['Record Size']) - 1 110 self.db_data = self.fdb.read(data_size) 111 else: 112 self.db_data = '' 113 self.row_format = '<' 114 self.row_ids = [] 115 self.row_len = 0 116 for key in self.fields: 117 field = self.fields[key] 118 self.row_format += '%ds ' % (field['Field Length']) 119 self.row_ids.append(field['Field Name']) 120 self.row_len += field['Field Length']
121
122 - def close(self):
123 if self.fdb: 124 self.fdb.close() 125 if self.fmemo: 126 self.fmemo.close()
127
128 - def get_numrecords(self):
129 return self.num_records
130
131 - def get_record_with_names(self, rec_no):
132 """ 133 This function accept record number from 0 to N-1 134 """ 135 if rec_no < 0 or rec_no > self.num_records: 136 raise Exception, 'Unable to extract data outside the range' 137 138 offset = self.header['Record Size'] * rec_no 139 data = self.db_data[offset:offset+self.row_len] 140 record = self._assign_ids(struct.unpack(self.row_format, data), self.row_ids) 141 142 if self.memo_file: 143 for key in self.fields: 144 field = self.fields[key] 145 f_type = field['Field Type'] 146 f_name = field['Field Name'] 147 c_data = record[f_name] 148 149 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P': 150 c_data = self._reverse_endian(c_data) 151 if c_data: 152 record[f_name] = self.read_memo(c_data-1).strip() 153 else: 154 record[f_name] = c_data.strip() 155 return record
156
157 - def read_memo_record(self, num, in_length):
158 """ 159 Read the record of given number. The second parameter is the length of 160 the record to read. It can be undefined, meaning read the whole record, 161 and it can be negative, meaning at most the length 162 """ 163 if in_length < 0: 164 in_length = -self.memo_block_size 165 166 offset = self.memo_header_len + num * self.memo_block_size 167 self.fmemo.seek(offset) 168 if in_length<0: 169 in_length = -in_length 170 if in_length==0: 171 return '' 172 return self.fmemo.read(in_length)
173
174 - def read_memo(self, num):
175 result = '' 176 buffer = self.read_memo_record(num, -1) 177 if len(buffer)<=0: 178 return '' 179 length = struct.unpack('>L', buffer[4:4+4])[0] + 8 180 181 block_size = self.memo_block_size 182 if length < block_size: 183 return buffer[8:length] 184 rest_length = length - block_size 185 rest_data = self.read_memo_record(num+1, rest_length) 186 if len(rest_data)<=0: 187 return '' 188 return buffer[8:] + rest_data
189
190 -def readDbf(filename):
191 """ 192 Read the DBF file specified by the filename and 193 return the records as a list of dictionary. 194 @param filename File name of the DBF 195 @return List of rows 196 """ 197 db = Dbase() 198 db.open(filename) 199 num = db.get_numrecords() 200 rec = [] 201 for i in range(0, num): 202 record = db.get_record_with_names(i) 203 rec.append(record) 204 db.close() 205 return rec
206 207 if __name__=='__main__': 208 rec = readDbf('dbf/sptable.dbf') 209 for line in rec: 210 print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip()) 211