colab 要上传档案
# 上传档案uploaded_files = files.upload()# 从字典中取得档案名称uploaded_file_name = list(uploaded_files.keys())[0]# 读取档案内容content = uploaded_files[uploaded_file_name]# 将字串转换为UTF-8格式(这是可选的,具体取决于您的文件)content = content.decode('utf-8')# 将内容保存到line.txt档案中with open('line.txt', 'w', encoding='utf-8') as file: file.write(content)# 显示档案名称和内容(供测试用)print(f"档案名称:{uploaded_file_name}")
=====
#设定
YOUR_NAME="XXX"
HER_NAME="SSSS"
安装需要的套件
!pip install jieba
!pip install cutecharts
import re
汇入套件
import jieba
from datetime import datetime
from cutecharts.charts import Bar, Pie
from cutecharts.components import Page
读取Colab上传的文档
content = open('line.txt', 'r', encoding='utf-8').read()
使用jieba进行分词
words = jieba.lcut(content)
counts = {}
进行统计
for word in words:
if len(word) <= 1 or word.isdigit():
continue
else:
counts[word] = counts.get(word, 0) + 1
删除不重要的词语
text = ' '.join(words)
excludes = {'\r\n', '下午', '上午', '...'}
for exword in excludes:
try:
del(counts[exword])
except:
continue
排序
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
绘製关键字图表
top_words = []
top_counts = []
i = -1
while len(top_words) <= 10:
i += 1
word, count = items[i]
if word == "通话" or word == "照片" or word == "影片" or word == "贴图" or word == YOUR_NAME or word == HER_NAME:
continue
top_words.append(word)
top_counts.append(count)
chart = Bar("关键字图表")
chart.set_options(labels=top_words, x_label="单词", y_label="出现次数")
chart.add_series("次数", top_counts)
绘製通话/影片/照片数统计图表
chart2 = Pie("通话/影片/照片数统计")
chart2.set_options(labels=['照片', '影片', '通话'])
chart2.add_series([counts.get("照片", 0), counts.get("影片", 0), counts.get("通话", 0)])
绘製传送讯息量图表
chart3 = Pie("传送讯息量")
chart3.set_options(labels=[YOUR_NAME, HER_NAME], inner_radius=0)
chart3.add_series([counts.get(YOUR_NAME, 0), counts.get(HER_NAME, 0)])
定义正规式
pattern = r"(?m)^.{10}((\w+))(?=\n)"
建立字典来储存日期次数
weekdays_counts = {}
读取档案内容
with open("line.txt", "r") as f:
content = f.read()
搜寻所有符合正规式的日期
matches = re.finditer(pattern, content)
统计日期次数
for match in matches:
weekday = match.group(1)
# 将星期名称转换为中文表示
if weekday in ("Monday", "二"):
weekday = "二"
elif weekday in ("Wednesday", "三"):
weekday = "三"
elif weekday in ("Thursday", "四"):
weekday = "四"
elif weekday in ("Friday", "五"):
weekday = "五"
elif weekday in ("Saturday", "六"):
weekday = "六"
elif weekday in ("Sunday", "日"):
weekday = "日"
weekdays_counts[weekday] = weekdays_counts.get(weekday, 0) + 1
输出日期次数
for weekday, count in weekdays_counts.items():
print(f"{weekday}: {count}")
绘製星期统计图表
chart4 = Bar("星期资料统计")
chart4.set_options(labels=list(weekdays_counts.keys()), x_label="星期", y_label="次数")
chart4.add_series("次数", list(weekdays_counts.values()))
将图表整合成一个页面
page = Page()
page.add(chart)
page.add(chart2)
page.add(chart3)
page.add(chart4)
将图表保存为HTML文件
html_file_path = "charts.html"
page.render(html_file_path)
在Colab中下载HTML文件
from google.colab import files
files.download("charts.html")