for index, row in df.iterrows(): print(index) # 输出每行的索引值 print(row['模块'])
按行遍历apply
1 2 3 4 5 6 7 8 9
defrow_global_parameter(df_row): ifnot df_row['可用'] == '否': # 遍历每一列,v为该列的值 for i, v in df_row.items(): if i == '模块': print(v) # 处理每一行,将每一行放到row_global_parameter处理 df.apply(row_global_parameter, axis=1)
特定几列遍历
1 2 3 4 5 6
modules = df['模块'] names = df['名字']
for module, name inzip(module, names): print(module) print(name)
defupdate_sheet(xlsx_file_path, update_sheet_name, df): # update one sheet data, but keep other sheet data book = load_workbook(xlsx_file_path) writer = pd.ExcelWriter(xlsx_file_path, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) df.to_excel(writer, update_sheet_name, index=None, encoding='utf-8-sig') writer.save() writer.close()
根据df_a删除df_b一行
1 2 3 4
for index, row in df_a.iterrows(): if row['模块'] == 'a': # drop rows from df_b that contains row['模块'] string in a 模块 column df_b = df_b[~df_b['模块'].isin([row['模块']])]
diff_changed_df = pd.DataFrame(columns=diff_removed_df.columns) changed_rows = [] for index1, outdated_row in outdated_df.iterrows(): for index2, new_row in new_df.iterrows(): # pd.DataFrame.equals: The data type of columns of the two parameters must be the same before comparison if outdated_row['a'] == new_row['a'] and outdated_row['b'] == new_row[ 'b'] andnot pd.DataFrame.equals(outdated_row.drop('ID'), new_row.drop('ID')): changed_rows.append(new_row.values) diff_changed_df = diff_changed_df.append( pd.DataFrame(changed_rows, columns=diff_changed_df.columns)).reset_index()
判断两个excel是否存在重复行
这里对一行数据判定唯一的标准是用a列和b列
1 2 3 4
# Check the table for duplicate data(用例名称 用例目录 are the same at the same time) defcheck_multiple_name_directory(df): res = df[df.duplicated(subset=['a', 'b'], keep=False)] returnTrueif res.empty elseFalse