本文同步发表于小弟自架网站:微确幸资讯站
import pandas as pddict = {'学年': {0: 108, 1: 108, 2: 108, 3: 108, 4: 108, 5: 108, 6: 108, 7: 108, 8: 108, 9: 108}, '学期': {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1}, '开课系所': {0: '法律学系', 1: '通识组', 2: '电机所', 3: '电机所', 4: '电机所', 5: '电机所', 6: '电机所', 7: '地质所', 8: '地质所', 9: '地质所'}, '课号': {0: 'A0263', 1: 'A0163', 2: 'A0134', 3: 'A0164', 4: 'A0238', 5: 'A0273', 6: 'A0431', 7: 'A0118', 8: 'A0149', 9: 'A0204'}, '课名': {0: '着作权与传播法规', 1: '民法债编总论', 2: '服务学习二', 3: '亚洲电影与政治', 4: '羽球(一)', 5: '运动专长训练专题研究', 6: '智慧财产权法专题研究(三)', 7: '普通化学丙', 8: '英文能力检定', 9: '导游实务'}, '学分': {0: 0.5, 1: 3.0, 2: 1.0, 3: 3.0, 4: 3.0, 5: 3.0, 6: 3.0, 7: 0.0, 8: 2.0, 9: 0.0}, '教师': {0: '余采蓉', 1: '张威德', 2: '周佩芸', 3: '林佳燕', 4: '蔡伟婷', 5: '郭美民', 6: '杨博仁', 7: '黄乐玟', 8: '陈学奇', 9: '林敬岳'}, '上课时间': {0: '一3一4', 1: '二5四5四6', 2: '二10二11', 3: '二2二3二4', 4: '五2五3五4', 5: '二5二6二7', 6: '二5二6二7', 7: '一3一4', 8: '一5一6', 9: '四1四2'}, '修课人数': {0: 9, 1: 57, 2: 41, 3: 46, 4: 22, 5: 9, 6: 22, 7: 39, 8: 55, 9: 54}, '学号': {0: 'K00001', 1: 'K00002', 2: 'K00003', 3: 'K00004', 4: 'K00005', 5: 'K00006', 6: 'K00007', 7: 'K00008', 8: 'K00009', 9: 'K00010'}, '修课学生部别': {0: '大学部', 1: '大学部', 2: '大学部', 3: '大学部', 4: '大学部', 5: '大学部', 6: '大学部', 7: '大学部', 8: '大学部', 9: '大学部'}, '修课学生系所': {0: '森林环资系', 1: '通识组', 2: '电机所', 3: '森林环资系', 4: '电机所', 5: '电机所', 6: '电机所', 7: '法律系', 8: '公卫系', 9: '化工系'}, '学生姓名': {0: '张中铭', 1: '刘俊杰', 2: '许哲玟', 3: '吴文芸', 4: '戴仲原', 5: '张瑞喜', 6: '谢良木', 7: '林嘉甲', 8: '陈丽芬', 9: '沈传花'}}# 设假要筛选学生姓名中含有以下关键字的资料pattern = ['铭', '传', '淡', '江', '逢', '甲', '中', '原', '文', '化']df = pd.DataFrame(dict)print(len(df))print(df.columns)df
mylist = []for name in df['学生姓名']: res = "" for s in name: if s in pattern and not s in res: res += s mylist.append(res)df['关键字'] = mylistdf
# 剔除「关键字」栏位为空值资料df[~(df['关键字']=='')]