I have a text file full of pc data, organized as a list of blocks of one of two types. Either:
*redacted*
My goal was to have Python (3.6.2) open and read the file, clean it up, and compile the data into an excel spreadsheet as follows:
Column 1: PC name
Column 2: Error Type (0 if none, 1-4 for 4 error types)
Column 3: ID (if no error, no braces containing the ID)
Column 4: Password (if no error, just the password)
Here is my code. I use Pycharm, and am in a virtual env:
import xlsxwriter
workbook = xlsxwriter.Workbook('Computer Data.xlsx')
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
left = workbook.add_format({'align': 'justify'})
worksheet.set_column(0, 0, 14)
worksheet.set_column(1, 1, 5)
worksheet.set_column(2, 2, 38)
worksheet.set_column(3, 3, 55)
worksheet.write('A1', 'Name', bold)
worksheet.write('B1', 'Error', bold)
worksheet.write('C1', 'ID', bold)
worksheet.write('D1', 'Password', bold)
def nonblank_lines(f):
for l in f:
line = l.rstrip()
if line:
yield line.lstrip
with open("C:\\Users\\MyName\\Desktop\\BLRP.txt", "r+") as op:
gold_lst = []
nonblank = nonblank_lines(op)
for line in nonblank:
if line.startswith("Computer Name"):
gold_lst.append(str(line))
gold_lst.append("NO ERROR")
elif line.startswith("ID"):
gold_lst.append("IDG: " + str(line))
gold_lst.append('NO ERROR')
elif line.startswith("ERROR: An error occurred while"):
gold_lst.append('1')
gold_lst.append(str('ID: {' + line + '}'))
gold_lst.append(str('Password: '))
elif line.startswith("ERROR: No key"):
gold_lst.append('2')
gold_lst.append(str('ID: {' + line + '}'))
gold_lst.append(str('Password: '))
elif line.startswith("ERROR: An error occurred (code 0x80070057)"):
gold_lst.append('3')
gold_lst.append(str('ID: {' + line + '}'))
gold_lst.append(str('Password: '))
elif line.startswith("ERROR: An error occurred (code 0x8004100e)"):
gold_lst.append('4')
gold_lst.append(str('ID: {' + line + '}'))
gold_lst.append(str('Password: '))
elif line.startswith("Password"):
gold_lst.append(str('Password: ' + next(nonblank)))
print(gold_lst)
op.close()
pc_data = (gold_lst)
row = 1
col = 0
for obj in pc_data:
if obj.startswith("Computer Name"):
worksheet.write_string(row, col, obj[15:])
elif obj.startswith('NO'):
worksheet.write_number(row, col + 1, 0, left)
elif obj.startswith('1'):
worksheet.write_number(row, col + 1, int(obj), left)
elif obj.startswith('2'):
worksheet.write_number(row, col + 1, int(obj), left)
elif obj.startswith('3'):
worksheet.write_number(row, col + 1, int(obj), left)
elif obj.startswith('4'):
worksheet.write_number(row, col + 1, int(obj), left)
elif obj.startswith("ID: {ERROR"):
worksheet.write_string(row, col + 2, '')
elif obj.startswith("IDG: "):
worksheet.write_string(row, col + 2, obj[10:-1])
elif obj.startswith("Password"):
worksheet.write_string(row, col + 3, obj[9:])
row += 1
workbook.close()
Now, this works perfectly for the file in question, but, in addition to the terribly suboptimal code, I'm sure, there is something I can explicitly see that needs improved. In this block:
if line.startswith("Computer Name"):
gold_lst.append(str(line))
gold_lst.append("NO ERROR")
I only want "NO ERROR" to be appended to my list if my line starts with "Computer Name" AND the next non-blank line does not begin with "ERROR." Naturally, I tried this:
if line.startswith("Computer Name"):
if next(nonblank).startswith("ERROR"):
gold_lst.append(str(line))
elif next(nonblank).startswith("VOLUME"):
gold_lst.append(str(line))
gold_lst.append("NO ERROR")
The problem is, this creates a jacked up excel spreadsheet, and I don't at all know why. Even in the step afterward in the main code where I print gold_lst (just to check if the list is correct), the list is terribly inaccurate. I can't even seem to figure out of what the list is comprised.
How can I fix this?
As for a second question, if I may ask it in the same topic, more general text files of this type which I am likely to receive in the future may contain computers with more than one ID and password. The block would look like this, if I had to guess:
*redacted*
And there may be even more than 2 such ID/Password combos. How can I modify my code to allow for this? As it stands, my code will not easily account for this. I am quite new to Python, so maybe it could, but I don't see it.
One approach to this problem is as follows:
groupby()
function to split the list of lines into blocks based on the Computer Name
line.The script is as follows:
from itertools import groupby
import xlsxwriter
import re
workbook = xlsxwriter.Workbook('Computer Data.xlsx')
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
left = workbook.add_format({'align': 'justify'})
cols = [('Name', 14), ('Error', 5), ('ID1', 38), ('Password1', 55), ('ID2', 38), ('Password2', 55), ('ID3', 38), ('Password3', 55)]
for colx, (heading, width) in enumerate(cols):
worksheet.write_string(0, colx, heading, bold)
worksheet.set_column(colx, colx, width)
rowy = 1
lines = []
data = []
computer_name = None
with open('BLRP.txt') as f_input:
lines = [line.strip() for line in f_input if len(line.strip())]
for k, g in groupby(lines, lambda x: x.startswith("Computer Name:")):
if k:
computer_name = re.search(r'Computer Name:\s*(.*)\s*', list(g)[0]).group(1)
elif computer_name:
block = list(g)
error = 'NO ERROR'
ids = []
passwords = []
for line_number, line in enumerate(block):
re_error = re.match('ERROR:\s+"(.*?)"', line)
if re_error:
error = re_error.group(1)
if line.startswith('Numerical Password:'):
ids.append(re.search('\{(.*?)\}', block[line_number+1]).group(1))
passwords.append(block[line_number+3].strip())
worksheet.write_string(rowy, 0, computer_name)
worksheet.write_string(rowy, 1, error)
for index, (id, pw) in enumerate(zip(ids, passwords)):
worksheet.write_string(rowy, index * 2 + 2, id)
worksheet.write_string(rowy, index * 2 + 3, pw)
rowy += 1 # Advance to the next output row
workbook.close()
Assuming your BLRP.txt
is as follows:
Computer Name: "Name Here1"
ERROR: "some type of error"
Blah blah
Blah blah
Blah blah
Computer Name: "Name Here2"
Volume blah blah
Blah Blah
Numerical Password:
ID: {"The ID1 is here; long string of random chars"}
Password:
"Password1 here; also a long string"
Blah Blah
Blah Blah
Numerical Password:
ID: {"The ID2 is here; long string of random chars"}
Password:
"Password2 here; also a long string"
Blah Blah
Blah Blah
Numerical Password:
ID: {"The ID3 is here; long string of random chars"}
Password:
"Password3 here; also a long string"
Blah Blah
Blah Blah
You would get a spreadsheet as follows:
How does groupby()
work?
Normally when you iterate over a list, it gives you the entries one item at a time. With groupby()
, you are able to iterate over this list in "groups", where the number of items in each group is based on a condition. The condition is provided in the form of a function (I have used lambda
to avoid writing a separate function).
groupby()
will build up the group of items to return until the result from the function changes. In this case, the function is looking for lines that start with the word Computer Name
. So when that is true it will return with one item (unless there are two adjacent lines with Computer Name
on them). Next it will return with all the lines that don't start with Computer Name
, and so on.
It returns two things, a key
and a group
. The key
is the result of the function startswith()
, which will either be True
or False
. The group
is an iterable holding all the matching items. list(g)
is used to convert it into a normal list, in this case all the lines until the next Computer Name
line is returned.
To write the entries onto different rows and to convert known error messages into numbers:
from itertools import groupby
import xlsxwriter
import re
workbook = xlsxwriter.Workbook('Computer Data.xlsx')
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold': True})
left = workbook.add_format({'align': 'justify'})
cols = [('Name', 14), ('Error', 5), ('ID', 38), ('Password', 55)]
for colx, (heading, width) in enumerate(cols):
worksheet.write_string(0, colx, heading, bold)
worksheet.set_column(colx, colx, width)
rowy = 1
lines = []
data = []
computer_name = None
error_numbers = {
'An error occurred while connecting to the BitLocker management interface.' : 1,
'No key protectors found.' : 2,
'An error occurred (code 0x80070057):' : 3,
'An error occurred (code 0x8004100e):' : 4}
with open('BLRP.txt') as f_input:
lines = [line.strip() for line in f_input if len(line.strip())]
for k, g in groupby(lines, lambda x: x.startswith("Computer Name:")):
block = list(g)
if k:
computer_name = re.search(r'Computer Name:\s*(.*)\s*', block[0]).group(1)
elif computer_name:
error_number = 0 # 0 for NO ERROR
ids = []
passwords = []
for line_number, line in enumerate(block):
re_error = re.match('ERROR:\s+?(.*)\s*?', line)
if re_error:
error = re_error.group(1)
error_number = error_numbers.get(error, -1) # Return -1 for an unknown error
if line.startswith('Numerical Password:'):
ids.append(re.search('\{(.*?)\}', block[line_number+1]).group(1))
passwords.append(block[line_number+3].strip())
worksheet.write_string(rowy, 0, computer_name)
worksheet.write_number(rowy, 1, error_number)
for id, pw in zip(ids, passwords):
worksheet.write_string(rowy, 0, computer_name)
worksheet.write_number(rowy, 1, error_number)
worksheet.write_string(rowy, 2, id)
worksheet.write_string(rowy, 3, pw)
rowy += 1 # Advance to the next output row
if len(ids) == 0:
rowy += 1 # Advance to the next output row
workbook.close()
User contributions licensed under CC BY-SA 3.0