diff --git a/数据提取统计.md b/数据提取统计.md new file mode 100644 index 0000000..c3f9ea8 --- /dev/null +++ b/数据提取统计.md @@ -0,0 +1,27 @@ +```javascript +const fs = require('fs'); +const lodash = require('lodash'); +const csv = require('fast-csv'); + +// 读取CSV文件,跳过有问题的行并给出警告 +csv.parseFile('chat.txt', { headers: true }, (err, data) => { + if (err) throw err; + const keyword = 'ai'; + const keywordData = data.filter(item => item.chat.includes(keyword)); + const wordCounts = lodash.countBy(data, 'chat'); + const top8Common = lodash.takeOrdered(wordCounts, 8, lodash.identity); + + const stats = { + Keyword: keyword, + Count: keywordData.length, + }; + + const results = { + stats: [stats], + top8: top8Common, + keywordData: keywordData, + }; + + fs.writeFileSync('statistics.json', JSON.stringify(results, null, 2), 'utf-8'); +}); +``` \ No newline at end of file