1
2 """
3 This is a DBF reader which reads Visual Fox Pro DBF format with Memo field.
4
5 Usage:
6 rec = readDbf('test.dbf')
7 for line in rec:
8 print line['name']
9
10 @author Yusdi Santoso
11 @date 13/07/2007
12 http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html
13 page says code is "available as is without any warranty or support".
14 """
15
16 import struct
17 import os, os.path
18 import sys
19 import csv
20 import tempfile
21 import ConfigParser
22
25 self.fdb = None
26 self.fmemo = None
27 self.db_data = None
28 self.memo_data = None
29 self.fields = None
30 self.num_records = 0
31 self.header = None
32 self.memo_file = ''
33 self.memo_header = None
34 self.memo_block_size = 0
35 self.memo_header_len = 0
36
38 for i in range(0, len(txt)):
39 if ord(struct.unpack('c', txt[i])[0])==0:
40 return txt[:i]
41 return txt
42
44 if not len(num):
45 return 0
46 val = struct.unpack('<L', num)
47 val = struct.pack('>L', val[0])
48 val = struct.unpack('>L', val)
49 return val[0]
50
52 result = {}
53 idx = 0
54 for item in lst:
55 id = ids[idx]
56 result[id] = item
57 idx += 1
58 return result
59
60 - def open(self, db_name):
61 filesize = os.path.getsize(db_name)
62 if filesize <= 68:
63 raise IOError, 'The file is not large enough to be a dbf file'
64
65 self.fdb = open(db_name, 'rb')
66
67 self.memo_file = ''
68 if os.path.isfile(db_name[0:-1] + 't'):
69 self.memo_file = db_name[0:-1] + 't'
70 elif os.path.isfile(db_name[0:-3] + 'fpt'):
71 self.memo_file = db_name[0:-3] + 'fpt'
72
73 if self.memo_file:
74
75 self.fmemo = open(self.memo_file, 'rb')
76 self.memo_data = self.fmemo.read()
77 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo_data[:8]), ['Block size'])
78 block_size = self.memo_header['Block size']
79 if not block_size:
80 block_size = 512
81 self.memo_block_size = block_size
82 self.memo_header_len = block_size
83 memo_size = os.path.getsize(self.memo_file)
84
85
86 data = self.fdb.read(32)
87 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size'])
88 self.header['id'] = hex(self.header['id'])
89
90 self.num_records = self.header['# of Records']
91 data = self.fdb.read(self.header['Header Size']-34)
92 self.fields = {}
93 x = 0
94 header_pattern = '<11s c 4x B B 14x'
95 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision']
96 pattern_len = 32
97 for offset in range(0, len(data), 32):
98 if ord(data[offset])==0x0d:
99 break
100 x += 1
101 data_subset = data[offset: offset+pattern_len]
102 if len(data_subset) < pattern_len:
103 data_subset += ' '*(pattern_len-len(data_subset))
104 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data_subset), ids)
105 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x]['Field Name'])
106
107 self.fdb.read(3)
108 if self.header['# of Records']:
109 data_size = (self.header['# of Records'] * self.header['Record Size']) - 1
110 self.db_data = self.fdb.read(data_size)
111 else:
112 self.db_data = ''
113 self.row_format = '<'
114 self.row_ids = []
115 self.row_len = 0
116 for key in self.fields:
117 field = self.fields[key]
118 self.row_format += '%ds ' % (field['Field Length'])
119 self.row_ids.append(field['Field Name'])
120 self.row_len += field['Field Length']
121
123 if self.fdb:
124 self.fdb.close()
125 if self.fmemo:
126 self.fmemo.close()
127
129 return self.num_records
130
132 """
133 This function accept record number from 0 to N-1
134 """
135 if rec_no < 0 or rec_no > self.num_records:
136 raise Exception, 'Unable to extract data outside the range'
137
138 offset = self.header['Record Size'] * rec_no
139 data = self.db_data[offset:offset+self.row_len]
140 record = self._assign_ids(struct.unpack(self.row_format, data), self.row_ids)
141
142 if self.memo_file:
143 for key in self.fields:
144 field = self.fields[key]
145 f_type = field['Field Type']
146 f_name = field['Field Name']
147 c_data = record[f_name]
148
149 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P':
150 c_data = self._reverse_endian(c_data)
151 if c_data:
152 record[f_name] = self.read_memo(c_data-1).strip()
153 else:
154 record[f_name] = c_data.strip()
155 return record
156
158 """
159 Read the record of given number. The second parameter is the length of
160 the record to read. It can be undefined, meaning read the whole record,
161 and it can be negative, meaning at most the length
162 """
163 if in_length < 0:
164 in_length = -self.memo_block_size
165
166 offset = self.memo_header_len + num * self.memo_block_size
167 self.fmemo.seek(offset)
168 if in_length<0:
169 in_length = -in_length
170 if in_length==0:
171 return ''
172 return self.fmemo.read(in_length)
173
175 result = ''
176 buffer = self.read_memo_record(num, -1)
177 if len(buffer)<=0:
178 return ''
179 length = struct.unpack('>L', buffer[4:4+4])[0] + 8
180
181 block_size = self.memo_block_size
182 if length < block_size:
183 return buffer[8:length]
184 rest_length = length - block_size
185 rest_data = self.read_memo_record(num+1, rest_length)
186 if len(rest_data)<=0:
187 return ''
188 return buffer[8:] + rest_data
189
191 """
192 Read the DBF file specified by the filename and
193 return the records as a list of dictionary.
194 @param filename File name of the DBF
195 @return List of rows
196 """
197 db = Dbase()
198 db.open(filename)
199 num = db.get_numrecords()
200 rec = []
201 for i in range(0, num):
202 record = db.get_record_with_names(i)
203 rec.append(record)
204 db.close()
205 return rec
206
207 if __name__=='__main__':
208 rec = readDbf('dbf/sptable.dbf')
209 for line in rec:
210 print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())
211