Update for clarity: I'm trying to append the value of the first match of a file name to a csv file. I would like to append the first fname
match in file_label2
used to apply the found
value to the Suggested Label
row. This information is retrieved from GitHub using github3.py.
In the code I have below, I do not receive an error, but I don't think it's the right way to accomplish getting the first file name match.
Sample output returned from GitHub:
PR Number: 123 Login: dbs Files: files/file-folder/media/figure01 file_label2 = figure01 files/file-folder/jsfile-to-checkin file_label2 = jsfile Suggested Label: Value1 PR Number: 567 Login: dba Files: files/file-folder/media/figure01 file_label2 = figure01 files/file-folder/csfile-to-checkin file_label2 = csfile Suggested Label: Value2
Desired csv output:
PR Number, Login, First File Found, Suggested Label 123,dbs,files/file-folder/jsfile-to-checkin, Value1 567,dba,files/file-folder/csfile-to-checkin, Value2
List used to match fname prefix after file split:
list1=["jsfile","csfile"] list2=["css","html"]
Code:
with open(inputFile,'w') as f: for prs in repo.pull_requests(): getlabels = repo.issue(prs.number).as_dict() labels = [labels['name'] for labels in getlabels['labels']] tags = ["Bug", "Blocked", "Investigate"] enterprisetag = [tagsvalue for tagsvalue in labels if tagsvalue in tags] found = "No file match" if enterprisetag: pass else: f.write("PR Number: %s" %getlabels['number'] + '\n' + "Login: %s" %getlabels['user']['login'] + '\n' + "Files: \n") for data in repo.pull_request(prs.number).files(): fname, extname = os.path.splitext(data.filename) f.write(fname+'\n') file_label = fname.rsplit('/',1)[-1] if file_label.count("-") == 1: file_label2 = file_label.split("-")[0] f.write("file_label2: %s" %file_label2 + '\n') else: file_label2 = "-".join(file_label.split("-",2)[:2]) f.write("file_label2: %s" %file_label2 + '\n') if [emlabel for emlabel in list1 if emlabel in file_label2]: found = "Value1" break elif [mk_label for mk_label in list2 if mk_label in file_label2]: found = "Value2" break else: found = (str(None)) f.write("Suggested Label: %s" %found + '\n') prNum, login, firstFileFound, label = None,None,None,None multiLineFlag = False with open(outputFile, 'w') as w: w.write("PR Number, Login, First File Found, Suggested Label\n") for line in open(inputFile): line = line.strip() if multiLineFlag and not(firstFileFound): if line.startswith('file_label') and any(fileType in line for fileType in enterprise_mobility + marketplace + modern_apps + pnp + tdc + tdc_abr + unlock_insights): firstFileFound = prevLine multiLineFlag = False else: prevLine = line if not multiLineFlag: if line.startswith('PR Number: '): prNum = line[len('PR Number: '):] elif line.startswith('Login: '): login = line[len('Login: '):] elif line.startswith('Suggested Label: '): label = line[len('Suggested Label: '):] elif line.startswith('Files:'): multiLineFlag = True if all([prNum, login, firstFileFound, label]): w.write("%s,%s,%s,%s\n" %(prNum, login, firstFileFound, label)) prNum, login, firstFileFound, label = None,None,None,None
3 Answers
Answers 1
The general idea is to separate data that is spread over multiple lines or a single-line, you scan for individual properties. Once they all are found, you start over with the next record.
prNum, login, firstFileFound, label = None,None,None,None multiLineFlag = False list1 = ["jsfile","csfile"] inputFile = '' # Provide your input filename here outputFile = '' # Provide your output filename here with open(outputFile, 'w') as w: w.write("PR Number, Login, First File Found, Suggested Label\n") for line in open(inputFile): line = line.strip() if multiLineFlag and not(firstFileFound): if line.startswith('file_label') and any(fileType in line for fileType in list1): firstFileFound = prevLine multiLineFlag = False else: prevLine = line if not multiLineFlag: if line.startswith('PR Number: '): prNum = line[len('PR Number: '):] elif line.startswith('Login: '): login = line[len('Login: '):] elif line.startswith('Suggested Label: '): label = line[len('Suggested Label: '):] elif line.startswith('Files:'): multiLineFlag = True if all([prNum, login, firstFileFound, label]): w.write("%s,%s,%s,%s\n" %(prNum, login, firstFileFound, label)) prNum, login, firstFileFound, label = None,None,None,None
The question could be clearer, but the way I understand it, you have a requirement to parse a custom-file format which is not too difficult. The following will work if my assumptions regarding your data are true.
So, for an input file that looks like:
PR Number: 123 Login: dbs Files: files/file-folder/media/figure01 file_label2 = figure01 files/file-folder/jsfile-to-checkin file_label2 = jsfile Suggested Label: Value1 PR Number: 567 Login: dba Files: files/file-folder/media/figure01 file_label2 = figure01 files/file-folder/csfile-to-checkin file_label2 = csfile Suggested Label: Value2
this will return:
PR Number, Login, First File Found, Suggested Label 123,dbs,files/file-folder/jsfile-to-checkin, Value1 567,dba,files/file-folder/csfile-to-checkin, Value2
Adjustments may be necessary to cover for edge conditions but I took a shot at the question after I saw your bounty.
Answers 2
You didn't mention what error your script got. I notice two possible error in the code that you post:
1
Inside the for loop for data in repo.pull_request(prs.number).files():
if [emlabel for emlabel in list1 if emlabel in file_label2]: found = "Value1"
Here file_label2
should be a string and emlabel
is also a string, so I think what you need is a '==' here:
if [emlabel for emlabel in list1 if emlabel == file_label2]:
2
When you try to append the filename:
str_to_list = [x.split(" ") for x in fname.split(" ")] row.append(str_to_list[0])
Here you probably get a nested list str_to_list=[['your/file/name']]
. Is it what you expect?
One more thing you didn't explain in your code is the parameter repo
. Where does it come from? Is it something you get from other scripts, or do you need to parse a text file to get it?
Please explain your problem in a more concise and clearer way so that people can really help.
Answers 3
I think this does most of what you need. I have made a few assumptions, like that pull_request(number).files()
is the same as pr.files()
from the outer loop. And I have removed some computations that I don't think were doing anything (splitting on ' ' each member of the return value of splitting on ' ', for instance).
#!python3 import csv import os.path class C: @property def number(self): return '12345' def as_dict(self): return {'labels':[{'name':'Foo'}], 'login':'xyzzy', } @property def filename(self): return 'path/to/jsfile-to-checkin.js' def files(self): return [C()] def issue(self, num): return C() def pull_requests(self): return [C()] repo = C() INFO = 'info.csv' INFO_LABELS = 'info-with-labels.csv' SKIP_TAGS = set(["Bug", "Blocked", "Investigate"]) FILENAME_LABELS = { 'csfile':'Value1', 'jsfile':'Value1', 'css':'Value2', 'html':'Value2', } with open(INFO, 'w+', newline='') as info_file, \ open(INFO_LABELS, 'w') as info_labels_file: info = csv.writer(info_file) info_labels = csv.writer(info_labels_file, lineterminator='\n') headers = 'PR Number|Login|First file found' info.writerow(headers.split('|')) label_headers = headers + '|Suggested Labels' info_labels.writerow(label_headers.split('|')) for pr in repo.pull_requests(): pr_issue = repo.issue(pr.number).as_dict() labels = [labels['name'] for labels in pr_issue['labels']] if any(tag in SKIP_TAGS for tag in labels): continue first_file = "No file match" use_label = '' for pr_file in pr.files(): filename = pr_file.filename.rsplit('/', 1)[-1] basename, ext = os.path.splitext(filename) name_parts = basename.split('-') if len(name_parts) < 3: file_tag = name_parts[0] else: file_tag = '-'.join(name_parts[0:2]) for text,label in FILENAME_LABELS.items(): if text in file_tag: first_file = pr_file.filename use_label = label break if use_label: break row = [pr.number, pr_issue['login'], first_file, use_label] info_labels.writerow(row)
0 comments:
Post a Comment