def LGM4_verification(input_file, output_file, new_update, table_name): ####################################### #file check variables column_length_counter = 0 check_line_length_list = [] check_blank_entry_list = [] check_column_label_flag = False check_column_labels = [] check_column_label_index = [] missing_column_label = [] #ril matrix variables ril_list = [] ril_matrix = {} ril_rows = 0 ril_tuple_list = [] ril_number_string = "" ril_flag = False ril_tuple_list_location = 0 ril_flag_counter = 0 #post file check variables matrix = {} rows = 0 columns = 141 repeat_marker_counter=0 failure_counter = 0 failure_list = [] warning_list = [] warning_counter = 0 ###################################### #file is checked for critical errors first, if some are found the script halts try: check_file = open(input_file) while True: check_line = check_file.readline() if check_line == '': break if '\n' in check_line: check_line = check_line[:-1] if '\r' in check_line: check_line = check_line[:-1] check_line = check_line.split('\t') check_line_length_list.append(len(check_line)) for i in range (0,len(check_line)): if (check_line[i] == ''): check_blank_entry = column_length_counter +1, i+1 check_blank_entry_list.append(check_blank_entry) if (column_length_counter == 0): check_column_labels = check_line for i in range (0, len(check_line) ): if (check_line[i] in lgm_dictionary_005.column_label_lgm4): continue else: check_column_label_index.append(i) check_column_label_flag = True column_length_counter = column_length_counter + 1 check_file.close() except: print "DID NOT FIND FILE!: " + input_file return #Blank entries were found if (len(check_blank_entry_list) != 0): print "\nBlank entries were found at the following locations\n\n" for i in range (0, len(check_blank_entry_list)): print "Row", repr(check_blank_entry_list[i][0]), ", Column" , lgm_dictionary_005.excel_column_label_dictionary[check_blank_entry_list[i][1]] return #the following code checks that all lines in the file have an equal length. #Matrices handed to the script must be rectangular for i in range (0, len(check_line_length_list)): for j in range (i+1 , len(check_line_length_list)): if (check_line_length_list[i] != check_line_length_list[j]): print "\nThe matrix is not rectangular check the list of lines for discrepancies\n\n" for k in range (0, len(check_line_length_list)): print "line", k+1, "has", check_line_length_list[k], "entries" print "All lines must have the same number of entries" return #The column labels do not match the defined column labels in column_label_lgm4 if (check_column_label_flag == True): print "\nThe following column labels are incorrect\n\n" for i in check_column_label_index: print "Column", lgm_dictionary_005.excel_column_label_dictionary[i+1], "The label is", check_column_labels[i], "it should be", lgm_dictionary_005.column_label_lgm4[i], '\n' return ## #Checks that all columns are in the file ## for i in column_label_lgm4: ## if i in check_column_labels: ## continue ## else: ## check_column_label_flag = True ## missing_column_label.append(i) ## if (check_column_label_flag == True): ## print "\nThe following columns are missing or they are not in the correct order.\n\n" ## for i in range (0,len(missing_column_label)): ## print missing_column_label[i] ## return #Ril matrix #A matrix containing the values of the ril columns is made. This is used later to create a full matrix try: ril_file = open(input_file) while True: ril_line = ril_file.readline() if ril_line == '': break if '' in ril_line: ril_line = ril_line[:-1] if '\r' in ril_line: ril_line = ril_line[:-1] ril_line = ril_line.split('\t') #removes all whitespace characters from the front and rear of the entry for i in range (0, len(ril_line)): ril_line[i].lstrip() ril_line[i].rstrip() if (ril_rows == 0): ril_list = ril_line if (ril_line[0] != '-'): for i in range (11,len(ril_line)): ril_matrix[ril_rows,i] = ril_line[i] else: for j in range(0,len(ril_line)): ril_matrix[ril_rows, j] = "ignored_row" ril_rows = ril_rows + 1 ril_file.close() except: print "DID NOT FIND FILE ril: " + input_file return #Ril Tuple #A data triple is stored in a list this list is used to index the ril matrix and check for values. for i in range(11, len(ril_list)): ril_id = ril_list[i] for j in range (0,len(ril_id)): if ril_id[j].isdigit(): ril_number_string = ril_number_string + ril_id[j] ril_number = int(ril_number_string) ril_number_string = "" ril_tuple = ril_number, ril_id , i ril_tuple_list.append(ril_tuple) #After file is checked for serious errors #The file is read line by line into a 2d matrix try: text_file = open(input_file) while True: line = text_file.readline() if line == '': break if '' in line: line = line[:-1] if '\r' in line: line = line[:-1] line = line.split('\t') #removes all whitespace characters from the front and rear of the entry for i in range (0, len(line)): line[i].lstrip() line[i].rstrip() if (line[0] != '-' ): for i in range ( 0, columns ): if i < 11:#reads data from file matrix[rows,i] = line[i] if ( rows == 0 ): #makes the full set of ril id's if ( i > 10 ): for j in range(1, 131): matrix[rows,j+10] = "RIL" + repr(j) break if ( rows != 0 ): if ( i > 10 ): for j in range(0, len(ril_tuple_list)): if ((i - 10) == ril_tuple_list[j][0]): ril_flag_counter = ril_flag_counter + 1 ril_tuple_list_location = j ril_flag = True if (ril_flag == True): matrix[rows,i] = ril_matrix[rows,ril_tuple_list[ril_tuple_list_location][2]] else: matrix[rows,i] = '-' ril_flag = False else: failure_string = (rows + 1) , "Row " + repr(rows + 1) + " was ignored because it had no marker_id" failure_list.append(failure_string) failure_counter = failure_counter + 1 for j in range(0,columns): matrix[rows, j] = "ignored_row" rows = rows + 1 text_file.close() except: print "DID NOT FIND FILE: " + input_file return #Repeated marker_id's while (repeat_marker_counter != rows): for i in range (repeat_marker_counter + 1, rows): if (matrix[i,0] == "ignored_row"): continue if (matrix[repeat_marker_counter,0] == matrix [i,0]): failure_string = (repeat_marker_counter+ 1) , "Row " + repr(repeat_marker_counter+ 1)+ ", marker_id " + repr(matrix[repeat_marker_counter,0]) + " has been repeated in row " + repr(i+1) failure_list.append(failure_string) failure_counter = failure_counter + 1 repeat_marker_counter= repeat_marker_counter + 1 #Status for i in range (1, rows): if (matrix[i,0] == "ignored_row"): continue if matrix[i,1] in lgm_dictionary_005.status: continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " has a status which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Sender for i in range (1, rows): if (matrix[i,0] == "ignored_row"): continue if matrix[i,2] in lgm_dictionary_005.sender: continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " has a sender which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Date_t4 for i in range (1,rows): if (matrix[i,0] == "ignored_row"): continue date = matrix[i,3] for j in range (0, len(date)): if date[j].isdigit(): continue elif date[j] == '/': continue elif date[j] == '\\': matrix[i,3] = string.replace(date,'\\','/') warning_string = (i+1) , "Row " + repr(i+1) + " Date used backslashes, use foward slashes in your dates!" warning_list.append(warning_string) warning_counter = warning_counter + 1 break else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " date is not in the correct format, date should be mm/dd/yyyy." failure_list.append(failure_string) failure_counter = failure_counter + 1 break ### REFORMAT DUMMY DATE for i in range (1,rows): if (matrix[i,0] == "ignored_row"): continue date = matrix[i,3] ### REFORMAT INTO MYSQL date = date.split('/') mm = date[0] dd = date[1] yy = date[2] ############ if len(mm) == 1: mm = "0" + mm if len(dd) == 1: dd = "0" + dd matrix[i,3] = yy + "-" + mm + "-" + dd print matrix[i,3] #Cross for i in range(1, rows): if (matrix[i,0] == "ignored_row"): continue if (matrix[i,4] in lgm_dictionary_005.cross): continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " Cross has a value which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Population for i in range(1, rows): if (matrix[i,0] == "ignored_row"): continue if (matrix[i,5] in lgm_dictionary_005.population): continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " Population has a value which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Method for i in range(1, rows): if (matrix[i,0] == "ignored_row"): continue if (matrix[i,6] in lgm_dictionary_005.method): if matrix[i,6] == "other": if matrix[i,7]== '-': failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " Other method needs to be specified." failure_list.append(failure_string) failure_counter = failure_counter + 1 continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " Method has a value which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Ril_values for i in range(1,rows): if (matrix[i,0] == "ignored_row"): continue for j in range(11,141): if matrix[i,j] in lgm_dictionary_005.ril_values: continue else: failure_string = (i+1) , "Row " + repr(i+1) + ", marker_id " + matrix[i,0] + " RIL_id " + matrix[0,j] +" has a value which is not recognized." failure_list.append(failure_string) failure_counter = failure_counter + 1 #Newline if (os.name == 'nt'): _newline = '\r\n' elif (os.name == 'mac'): _newline = '\r' else: _newline = '\n' #Stops file output and outputs failure file if (failure_counter != 0): failure_file = open(output_file + ".failure" , "wbU") failure_list.sort() for i in range (0,len(failure_list)): failure_file.write(failure_list[i][1]) failure_file.write(_newline) print "\nOutput file was not created, check the failure file." failure_file.close() return #Output warning file if (warning_counter !=0): warning_file = open(output_file + ".warning" , "wbU") warning_list.sort() for j in range (0, len(warning_list)): warning_file.write(warning_list[j][1]) warning_file.write(_newline) print "\nWarning file was created, please check for data entry mistakes." warning_file.close() #Create output file file_output = open(output_file + ".clean", "wbU") for i in range (0,rows): for j in range (0,columns): file_output.write(matrix[i,j]) file_output.write('\t') file_output.write(_newline) file_output.close() if new_update == "UPDATE": file_mysql = open(output_file + ".update.table", "wbU") file_mysql.write("table_name" + '\t' + "marker_id" + '\t' + "field" + '\t' + "new_value" + '\n') for i in range (0,rows): for j in range (0,columns): ### CHANGE RIL TO R (DUMMY SOLUTION) ### if j >= 11: matrix[i,j] = re.sub("RIL", "R", matrix[i,j]) ### WRITE DATA TO FILE ### ### i AND j > 0 TO SKIP REPLACEMENT OF MARKER_ID if matrix[i,j] != "X" and i > 0 and j > 0: file_mysql.write(table_name + '\t' + matrix[i,0] + '\t' + matrix[0,j] + '\t' + matrix[i,j] + '\n') file_mysql.close() print "\nprogram exited succesfully" import array import string import os import re import sys import string import lgm_dictionary_005 if __name__ == "__main__": if len(sys.argv) <= 4 or len(sys.argv) > 5: print "Program usage: " print "Input_file, Output_file, NEW/UPDATE, Table_Name" exit else: input_file = sys.argv[1] output_file = sys.argv[2] new_update = sys.argv[3] table_name = sys.argv[4] LGM4_verification(input_file, output_file, new_update, table_name)