I have a text file, I would like to extract the lines between starting (Socket:) and ending keywords (Socket:), then do a processing.
Input:
Socket: 1
Device ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
CB: 3/ID: 0x445DDC13
BIBID: 0x65C
A:0xB0000190 D:0x310020FF
A:0xB0000194 D:0x00000000
A:0xB0000198 D:0x31002010
A:0xB000019C D:0x00000017
A:0xB00001A0 D:0x31002020
A:0xB00001A4 D:0x00000017
A:0xB00001A8 D:0x31002040
A:0xB00001AC D:0x00000000
A:0xB00001B0 D:0x31001000
ART: 0xB0000800 DRT: 0xB0000000
ART: 0xB0000804 DRT: 0xB0000000
ART: 0xB0000808 DRT: 0xB0000000
ART: 0xB000080C DRT: 0xB0000000
ART: 0xB0000810 DRT: 0xB0000000
ART: 0xB0000814 DRT: 0xB0000000
ART: 0xB0000818 DRT: 0xB0000000
ART: 0xB000081C DRT: 0xB0000000
ART: 0xB0000820 DRT: 0xB0000000
ART: 0xB0000824 DRT: 0xB0000000
ART: 0xB0000828 DRT: 0xB0000000
ART: 0xB000082C DRT: 0xB0000000
ART: 0xB0000830 DRT: 0xB0000000
ART: 0xB0000834 DRT: 0xB0000000
ART: 0xB0000838 DRT: 0xB0000000
ART: 0xB000083C DRT: 0xB0000000
ART: 0xB0000840 DRT: 0xB0000000
ART: 0xB0000844 DRT: 0xB0000000
ART: 0xB0000848 DRT: 0xB0000000
ART: 0xB000084C DRT: 0xB0000000
ART: 0xB0000850 DRT: 0xB0000000
ART: 0xB0000854 DRT: 0xB0000000
ART: 0xB0000858 DRT: 0xB0000000
ART: 0xB000085C DRT: 0xB0000000
ART: 0xB0000860 DRT: 0xB0000000
ART: 0xB0000864 DRT: 0xB0000000
ART: 0xB0000868 DRT: 0xB0000000
ART: 0xB000086C DRT: 0xB0000000
ART: 0xB0000870 DRT: 0xB0000000
ART: 0xB0000874 DRT: 0xB0000000
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000
...
Socket:2
...
Current Code:
import re
from collections import defaultdict
dict2=defaultdict(list)
dict3=defaultdict(list)
socket_position_status = False
dev_id_status = False
CB_noS_status = False
trf_val_flag = False
dict1=defaultdict(list)
pattern="QWL"
rd_case= "Digital"
setup_temp = "0C"
readout_temp = "0C"
address=[]
def tde_file():
with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
for line in f:
pattern_tde = ":TEST_RESULT"
if pattern_tde in line:
tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
x0 =re.match(tde_addr, line)
if x0:
address_tde = x0.group(2)
tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
x1 = re.search(tde_addr_1,line)
tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
x2 = re.search(tde_addr_2,line)
tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
y0 = re.search(tde_addrs_1,line)
if x1:
hlp_s = x1.group(2).zfill(2)
identifier = x1.group(1)+"_m"+hlp_s
if x2:
identifier = x2.group(1)+"_m"+x2.group(2)
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(1)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError:pass
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(3)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError: pass
try:
if y0.group(1) and y0.group(3):
dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
except AttributeError: pass
#print dict1
#print len(dict1.keys())
#for k,v in sorted(dict1.items()):
#print k,v
def evaluate_lot_wxy(trf_dev_id_pattern):
import re
binary_value = ""
line = trf_dev_id_pattern
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
hex_inp1 = re.search(dev_id,line)
#print hex_inp1.group()
hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
hex_inp3 = re.sub('0x', '', hex_inp2)
#print hex_inp3
for i,val in enumerate(str(hex_inp3)):
binary_value1=str(bin(int(val,16))[2:]).zfill(4)
binary_value = str(binary_value) + str(binary_value1)
#print binary_value
wafer_val = binary_value[90:96]
wafer = int(wafer_val,2)
y_pos_val = binary_value[106:113]
y_pos = int(y_pos_val,2)
x_pos_val = binary_value[98:105]
x_pos = int(x_pos_val,2)
year_val = binary_value[63:67]
year = int(year_val,2)
production_week_val = binary_value[67:73]
production_week = int(production_week_val,2)
serial_no_val = binary_value[73:83]
serial_no=int(serial_no_val,2)
lot ="ZA"+str(year)+str(production_week)+str(serial_no)
if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
dev_id_status = True
return lot,wafer,x_pos,y_pos,dev_id_status
tde_file()
with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1kCycling\Results_452_13384\Result Files\temp\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf", "rt") as f1:
lines = f1.read()
print lines
#for lines in f1.read():
match = re.search(r'Socket:(.*?)Socket:', lines, flags=re.DOTALL)
#print match.group()
for line in match.group().splitlines():
if "Socket:" in line:
trf_addr = 'Socket: (\d+)$'
x0 =re.match(trf_addr, line)
try:
if x0.group(1).zfill(3):
socket_position = x0.group(1).zfill(3)
socket_position_status = True
#print socket_position
except AttributeError: pass
elif "Device ID:" in line:
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
x1 = re.search(dev_id,line)
try:
if x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
dev_id_status = True
#print lot_wafer_x_y
except AttributeError: pass
elif "CB:" in line:
CB_pat = 'CB: (\d+)\/'
x2 = re.search(CB_pat,line)
try:
if x2.group(1):
CB_noS_status = True
#print CB_noS_status
except AttributeError: pass
elif"ART:" in line:
regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10})",line)
#print line
try:
if regex.group(1) and regex.group(2):
for key1,val1 in dict1.iteritems():
if regex.group(1) in key1:
#print "Address:"+regex.group(1)
hlp_a = val1
hlp_b = hlp_a.split("_")
identifier = hlp_b[0]
fail_class = hlp_b[1]
key_addtional = hlp_b[2]
val = regex.group(2)
value = int(val[3:],16)
dict2[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier,key_addtional]= value
except AttributeError: pass
for k,v in sorted(dict2.items()):
print k,v
Current Output:
Currently the code is print the output for the first match and I would like to get the output for the entire match in the input file.
('Digital', 'QWL', '0C', '0C', '001', 'finish', 'DTS', 'temp') 16
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'start', 'DTS', 'temp') 14
The above is the output for first match, but I would like to get the output for every match in the file. Could anyone help me in this ? Thanks in advance.
try this:
import re
txt = '''Test_Socket: 1
TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
TestA_CB: 3/ID: 0x445DDC13
TESTA_BD: 0x65C
A:0xB0000190 D:0x310020FF
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000 ... Test_Socket:2'''
match = re.search(r'Test_Socket:(.*?)Test_Socket:', txt, flags=re.DOTALL)
print(match.group(1))
code outputs:
TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
TestA_CB: 3/ID: 0x445DDC13
TESTA_BD: 0x65C
A:0xB0000190 D:0x310020FF
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000 ...
after extracting the lines your can iterate them or run another regex to get what you need from these lines.
for line in match.group(1).splitlines():
...
I found a solution, I have tried re.finditer() in regex and it is working as expected. Please find my code, if there is a better approach than this, plz let me know. I would like thank everyone.
CODE:
import re
from collections import defaultdict
dict2=defaultdict(list)
dict3=defaultdict(list)
dict1=defaultdict(list)
pattern="QWL"
rd_case= "Digital"
setup_temp = "0C"
readout_temp = "0C"
address=[]
def tde_file():
with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
for line in f:
pattern_tde = ":TEST_RESULT"
if pattern_tde in line:
tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
x0 =re.match(tde_addr, line)
if x0:
address_tde = x0.group(2)
tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
x1 = re.search(tde_addr_1,line)
tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
x2 = re.search(tde_addr_2,line)
tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
y0 = re.search(tde_addrs_1,line)
if x1:
hlp_s = x1.group(2).zfill(2)
identifier = x1.group(1)+"_m"+hlp_s
if x2:
identifier = x2.group(1)+"_m"+x2.group(2)
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(1)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError:pass
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(3)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError: pass
try:
if y0.group(1) and y0.group(3):
dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
except AttributeError: pass
def evaluate_lot_wxy(trf_dev_id_pattern):
import re
binary_value = ""
dev_id_status = False
line = trf_dev_id_pattern
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
hex_inp1 = re.search(dev_id,line)
#print hex_inp1.group()
hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
hex_inp3 = re.sub('0x', '', hex_inp2)
#print hex_inp3
for i,val in enumerate(str(hex_inp3)):
binary_value1=str(bin(int(val,16))[2:]).zfill(4)
binary_value = str(binary_value) + str(binary_value1)
#print binary_value
wafer_val = binary_value[90:96]
wafer = int(wafer_val,2)
y_pos_val = binary_value[106:113]
y_pos = int(y_pos_val,2)
x_pos_val = binary_value[98:105]
x_pos = int(x_pos_val,2)
year_val = binary_value[63:67]
year = int(year_val,2)
production_week_val = binary_value[67:73]
production_week = int(production_week_val,2)
serial_no_val = binary_value[73:83]
serial_no=int(serial_no_val,2)
lot ="ZA"+str(year)+str(production_week)+str(serial_no)
if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
dev_id_status = True
return lot,wafer,x_pos,y_pos,dev_id_status
tde_file()
with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1k Cycling\Results_452_13384\Result Files\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf") as f1:
lines = f1.read()
socket_position_status = False
dev_id_status = False
CB_noS_status = False
trf_val_flag = False
for m in re.finditer(r'Socket:(.*?)ART: 0xB00017EC DRT: 0x00000000\n', lines,flags=re.DOTALL):
x1 = ('%s' % (m.group(0)))
for line in x1.splitlines():
if "Socket:" in line:
trf_addr = 'Socket: (\d+)$'
x0 =re.match(trf_addr, line)
try:
if x0.group(1).zfill(3):
socket_position = x0.group(1).zfill(3)
socket_position_status = True
#print socket_position
except AttributeError: pass
elif "Device ID:" in line:
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
x1 = re.search(dev_id,line)
try:
if x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
dev_id_status = lot_wafer_x_y[4]
#print lot_wafer_x_y
except AttributeError: pass
elif "CB:" in line:
CB_pat = 'CB: (\d+)\/'
x2 = re.search(CB_pat,line)
try:
if x2.group(1):
CB_no = x2.group(1)
CB_noS_status = True
#print CB_noS_status
except AttributeError: pass
elif"ART:" in line:
regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10}$)",line)
#print line
try:
if regex.group(1) and regex.group(2):
for key1,val1 in dict1.iteritems():
if regex.group(1) in key1:
#print "Address:"+regex.group(1)
hlp_a = val1
hlp_b = hlp_a.split("_")
identifier = hlp_b[0]
fail_class = hlp_b[1]
key_addtional = hlp_b[2]
val = regex.group(2)
value = int(val[3:],16)
trf_val_flag = True
if dev_id_status and trf_val_flag and CB_noS_status:
dict3[rd_case,pattern,setup_temp,readout_temp,CB_no,socket_position,fail_class,identifier,key_addtional]= value
continue
except AttributeError: pass
#elif socket_position_status and dev_id_status and CB_noS_status and trf_val_flag:
#dict3[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier]= value
print len(dict3.keys())
User contributions licensed under CC BY-SA 3.0