python 分析line通话记录

colab 要上传档案

# 上传档案uploaded_files = files.upload()# 从字典中取得档案名称uploaded_file_name = list(uploaded_files.keys())[0]# 读取档案内容content = uploaded_files[uploaded_file_name]# 将字串转换为UTF-8格式(这是可选的,具体取决于您的文件)content = content.decode('utf-8')# 将内容保存到line.txt档案中with open('line.txt', 'w', encoding='utf-8') as file:    file.write(content)# 显示档案名称和内容(供测试用)print(f"档案名称:{uploaded_file_name}")

=====

#设定
YOUR_NAME="XXX"
HER_NAME="SSSS"

安装需要的套件

!pip install jieba
!pip install cutecharts

import re

汇入套件

import jieba
from datetime import datetime
from cutecharts.charts import Bar, Pie
from cutecharts.components import Page

读取Colab上传的文档

content = open('line.txt', 'r', encoding='utf-8').read()

使用jieba进行分词

words = jieba.lcut(content)
counts = {}

进行统计

for word in words:
if len(word) <= 1 or word.isdigit():
continue
else:
counts[word] = counts.get(word, 0) + 1

删除不重要的词语

text = ' '.join(words)
excludes = {'\r\n', '下午', '上午', '...'}
for exword in excludes:
try:
del(counts[exword])
except:
continue

排序

items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)

绘製关键字图表

top_words = []
top_counts = []
i = -1
while len(top_words) <= 10:
i += 1
word, count = items[i]
if word == "通话" or word == "照片" or word == "影片" or word == "贴图" or word == YOUR_NAME or word == HER_NAME:
continue
top_words.append(word)
top_counts.append(count)

chart = Bar("关键字图表")
chart.set_options(labels=top_words, x_label="单词", y_label="出现次数")
chart.add_series("次数", top_counts)

绘製通话/影片/照片数统计图表

chart2 = Pie("通话/影片/照片数统计")
chart2.set_options(labels=['照片', '影片', '通话'])
chart2.add_series([counts.get("照片", 0), counts.get("影片", 0), counts.get("通话", 0)])

绘製传送讯息量图表

chart3 = Pie("传送讯息量")
chart3.set_options(labels=[YOUR_NAME, HER_NAME], inner_radius=0)
chart3.add_series([counts.get(YOUR_NAME, 0), counts.get(HER_NAME, 0)])

定义正规式

pattern = r"(?m)^.{10}((\w+))(?=\n)"

建立字典来储存日期次数

weekdays_counts = {}

读取档案内容

with open("line.txt", "r") as f:
content = f.read()

搜寻所有符合正规式的日期

matches = re.finditer(pattern, content)

统计日期次数

for match in matches:
weekday = match.group(1)
# 将星期名称转换为中文表示
if weekday in ("Monday", "二"):
weekday = "二"
elif weekday in ("Wednesday", "三"):
weekday = "三"
elif weekday in ("Thursday", "四"):
weekday = "四"
elif weekday in ("Friday", "五"):
weekday = "五"
elif weekday in ("Saturday", "六"):
weekday = "六"
elif weekday in ("Sunday", "日"):
weekday = "日"
weekdays_counts[weekday] = weekdays_counts.get(weekday, 0) + 1

输出日期次数

for weekday, count in weekdays_counts.items():
print(f"{weekday}: {count}")

绘製星期统计图表

chart4 = Bar("星期资料统计")
chart4.set_options(labels=list(weekdays_counts.keys()), x_label="星期", y_label="次数")
chart4.add_series("次数", list(weekdays_counts.values()))

将图表整合成一个页面

page = Page()
page.add(chart)
page.add(chart2)
page.add(chart3)
page.add(chart4)

将图表保存为HTML文件

html_file_path = "charts.html"
page.render(html_file_path)

在Colab中下载HTML文件

from google.colab import files
files.download("charts.html")


关于作者: 网站小编

码农网专注IT技术教程资源分享平台,学习资源下载网站,58码农网包含计算机技术、网站程序源码下载、编程技术论坛、互联网资源下载等产品服务,提供原创、优质、完整内容的专业码农交流分享平台。

热门文章