With some help from here , I have this working almost exactly the way I want. Now I need to be able to add the ability to remove data from a file before the files are compared.
The reason for this is the strings, "data", that i'm removing is known to differ each time the file is saved.
I have written a regex to select the exact text that I want to remove, but I am having trouble implementing it with my current code.
Here are the three main functions
HOSTNAME_RE = re.compile(r'hostname +(\S+)') def get_file_info_from_lines(filename, file_lines): hostname = None a_hash = hashlib.sha1() for line in file_lines: a_hash.update(line.encode('utf-8')) match = HOSTNAME_RE.match(line) if match: hostname = match.group(1) return hostname, filename, a_hash.hexdigest() def get_file_info(filename): if filename.endswith(('.cfg', '.startup', '.confg')): with open(filename, "r+") as in_file: #filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE) return get_file_info_from_lines(filename, in_file.readlines()) def hostname_parse(directory): results = {} i = 0 l = len(os.listdir(directory)) for filename in os.listdir(directory): filename = os.path.join(directory, filename) sleep(0.001) i += 1 progress_bar(i, l, prefix = 'Progress:', suffix = 'Complete', barLength = 50) info = get_file_info(filename) if info is not None: results[info[0]] = info return results
This is the regex for finding the strings to be removed.
REMOVE_RE = r"((?:\bCurrent configuration)(?:.*\n?){6})" subst = ""
EXAMPLE_FILE_BEFORE_DATA_REMOVED:
Building configuration... Current configuration : 45617 bytes ! ! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user ! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user ! version 15.0 no service pad ! no logging console enable secret 5 ***encrypted password*** ! username admin privilege 15 password 7 ***encrypted password*** username sadmin privilege 15 secret 5 ***encrypted password*** aaa new-model ! ip ftp username ***encrypted password*** ip ftp password 7 ***encrypted password*** ip ssh version 2 ! line con 0 password 7 ***encrypted password*** login authentication maint line vty 0 4 password 7 ***encrypted password*** length 0 transport input ssh line vty 5 15 password 7 ***encrypted password*** transport input ssh !
EXAMPLE_FILE_AFTER_DATA_REMOVED:
Building configuration... ! no service pad ! no logging console enable ! username admin privilege 15 username gisadmin privilege 15 aaa new-model ! ip ftp username cfgftp ip ftp ip ssh version 2 ! line con 0 login authentication maint line vty 0 4 length 0 transport input ssh line vty 5 15 transport input ssh !
I've tried doing something like #filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE) within the get_file_info and get_file_info_from_lines but I'm obviously not implementing it correctly.
Any help would be appreciated as I am just learning.
Running the Compare:
results1 = hostname_parse('test1.txt') results2 = hostname_parse('test2.txt') for hostname, filename, filehash in results1.values(): if hostname in results2: _, filename2, filehash2 = results2[hostname] if filehash != filehash2: print("%s has a change (%s, %s)" % ( hostname, filehash, filehash2)) print(filename) print(filename2) print()
I do not want to modify the current file. If all of this can be done in memory or a temporary file would be great.
FULL CODE:
import hashlib import os import re HOSTNAME_RE = re.compile(r'hostname +(\S+)') REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})") def get_file_info_from_lines(filename, file_lines): hostname = None a_hash = hashlib.sha1() for line in file_lines: #match = HOSTNAME_RE.match(line) if not re.match(REMOVE_RE, line): a_hash.update(line.encode('utf-8')) #======================================================================= # if match: # hostname = match.group(1) #======================================================================= return hostname, filename, a_hash.hexdigest() def get_file_info(filename): if filename.endswith(('.cfg', '.startup', '.confg')): with open(filename, "r+") as in_file: return get_file_info_from_lines(filename, in_file.readlines()) def hostname_parse(directory): results = {} for filename in os.listdir(directory): filename = os.path.join(directory, filename) info = get_file_info(filename) if info is not None: results[info[0]] = info return results results1 = hostname_parse('test1') #Directory of test files results2 = hostname_parse('test2') #Directory of test files 2 for hostname, filename, filehash in results1.values(): if hostname in results2: _, filename2, filehash2 = results2[hostname] if filehash != filehash2: print("%s has a change (%s, %s)" % ( hostname, filehash, filehash2)) print(filename) print(filename2) print()
2 Answers
Answers 1
In get_file_info_from_lines
, simply ignore the line if it matches your regular expression. This way you don't need to actually modify the file or create another file, you simply calculate the hash with the lines that actually matter.
for line in file_lines: if not re.match(REMOVE_RE, line): a_hash.update(line.encode('utf-8'))
Answers 2
Hi i suggest you to use the following apporach : Use a function to clean a line. The process lines to remove the empty ones.
Then use Difflib to compare. Use python -m doctest file.py
to check doctest
import re source_content = """ Building configuration... Current configuration : 45617 bytes ! ! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user ! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user ! version 15.0 no service pad ! no logging console enable secret 5 ***encrypted password*** ! username admin privilege 15 password 7 ***encrypted password*** username sadmin privilege 15 secret 5 ***encrypted password*** aaa new-model ! ip ftp username ***encrypted password*** ip ftp password 7 ***encrypted password*** ip ssh version 2 ! line con 0 password 7 ***encrypted password*** login authentication maint line vty 0 4 password 7 ***encrypted password*** length 0 transport input ssh line vty 5 15 password 7 ***encrypted password*** transport input ssh ! """ target_content = """ Building configuration... ! no service pad ! no logging console enable ! username admin privilege 15 username gisadmin privilege 15 aaa new-model ! ip ftp username cfgftp ip ftp ip ssh version 2 ! line con 0 login authentication maint line vty 0 4 length 0 transport input ssh line vty 5 15 transport input ssh ! """ HOSTNAME_RE = re.compile(r'hostname +(\S+)') REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})") def process_line(line): """ >>> process_line('! rgrg') '!' >>> process_line('username admin privilege 15 password 7 ***encrypted password***') """ if line.startswith('!'): return '!' if HOSTNAME_RE.match(line): return match.group(1) if REMOVE_RE.match(line): return '' return line #debug for line in source_content.split('\n'): print(repr(process_line(line).strip())) whitened = '\n'.join(process_line(line).strip() for line in source_content.split('\n')) def clean_lines(lines, flag=''): """ Replaces multiple 'flag' lines by only one. """ res = [] in_block = False for line in lines: if line.strip('\n') == flag: if not in_block: res.append(line) in_block = True continue in_block = False res.append(line) return res print('^^^^^^^^^^^^^^') no_exc = '\n'.join(clean_lines(whitened.split('\n'), flag='!')) print(no_exc) print('##############') no_sp = '\n'.join(clean_lines(no_exc.split('\n'))) print(no_sp)
0 comments:
Post a Comment