统计分析界面

4 months ago · 9417fef276
parent d039521926
commit 9417fef276
4 changed files with 509 additions and 819 deletions
--- a/src/vue2/src/components/layout/Header.vue
+++ b/src/vue2/src/components/layout/Header.vue
@ -10,6 +10,9 @@
        <router-link to="/process" active-class="active">
          <el-button type="text">处理流程</el-button>
        </router-link>
+        <router-link to="/statistics" active-class="active">
+          <el-button type="text">统计界面</el-button>
+        </router-link>
      </div>
    </div>
  </div>
--- a/src/vue2/src/router/index.js
+++ b/src/vue2/src/router/index.js
@ -1,6 +1,7 @@
 import Vue from 'vue'
 import VueRouter from 'vue-router'
 import Process from '../views/Process.vue'
+import StatisticsPage from '../views/Statistics.vue'

 Vue.use(VueRouter)

@ -15,6 +16,11 @@ const routes = [
    name: 'Process',
    component: Process
  },
+  {
+    path: '/statistics',
+    name: 'StatisticsPage',
+    component: StatisticsPage
+  },
  {
    path: '*',
    redirect: '/process'
--- a/src/vue2/src/utils/textAnalysis.js
+++ b/src/vue2/src/utils/textAnalysis.js
@ -1,819 +0,0 @@
-/**
- * 文本分析工具函数
- * 提供词频统计、趋势分析、搭配词分析等功能
- */
-// import _ from 'lodash'
-
-// 中文停用词列表（常见的虚词、助词等）
-const chineseStopWords = [
-  '的', '了', '和', '是', '在', '我', '有', '就', '不', '人', '都', 
-  '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', 
-  '着', '没有', '看', '好', '自己', '这', '那', '这个', '那个', '啊',
-  '吧', '呢', '啦', '呀', '吗', '哦', '哪', '对', '可以', '他', '她',
-  '它', '这些', '那些', '把', '让', '向', '往', '是否', '什么', '怎么',
-  '如何', '为', '为了', '依', '从', '当', '来', '被'
-]
-
-// 英文停用词列表
-const englishStopWords = [
-  'a', 'an', 'the', 'and', 'or', 'but', 'if', 'because', 'as', 'what',
-  'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your',
-  'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she',
-  'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their',
-  'theirs', 'themselves', 'this', 'that', 'these', 'those', 'am', 'is', 'are',
-  'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do',
-  'does', 'did', 'doing', 'would', 'should', 'could', 'ought', 'i\'m', 'you\'re',
-  'he\'s', 'she\'s', 'it\'s', 'we\'re', 'they\'re', 'i\'ve', 'you\'ve', 'we\'ve',
-  'they\'ve', 'i\'d', 'you\'d', 'he\'d', 'she\'d', 'we\'d', 'they\'d', 'i\'ll',
-  'you\'ll', 'he\'ll', 'she\'ll', 'we\'ll', 'they\'ll', 'isn\'t', 'aren\'t',
-  'wasn\'t', 'weren\'t', 'hasn\'t', 'haven\'t', 'hadn\'t', 'doesn\'t', 'don\'t',
-  'didn\'t', 'won\'t', 'wouldn\'t', 'shan\'t', 'shouldn\'t', 'can\'t', 'cannot',
-  'couldn\'t', 'mustn\'t', 'let\'s', 'that\'s', 'who\'s', 'what\'s', 'here\'s',
-  'there\'s', 'when\'s', 'where\'s', 'why\'s', 'how\'s', 'of', 'on', 'at', 'in',
-  'to', 'for', 'with', 'by', 'about', 'against', 'between', 'into', 'through',
-  'during', 'before', 'after', 'above', 'below', 'from', 'up', 'down', 'out',
-  'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there',
-  'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',
-  'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same',
-  'so', 'than', 'too', 'very'
-]
-
-// 合并停用词列表
-const stopWordsList = [...chineseStopWords, ...englishStopWords]
-
-/**
- * 分词处理函数
- * 简单按空格和标点符号拆分
- * 
- * @param {string} text - 输入文本
- * @param {object} options - 分词选项
- * @returns {Array} - 分词结果数组
- */
-export const tokenize = (text, options = {}) => {
-  const defaultOptions = {
-    minLength: 2,         // 最小词长
-    filterStopWords: true, // 是否过滤停用词
-    language: 'zh'         // 语言
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!text || typeof text !== 'string') {
-    return []
-  }
-  
-  // 预处理文本，统一替换标点符号为空格，转小写
-  let processedText = text.toLowerCase()
-    .replace(/[.,?!;:()[\]{}""''「」『』【】《》〈〉]/g, ' ')
-    .replace(/\s+/g, ' ')
-    .trim()
-  
-  // 按空格分词（对英文、数字有效）
-  let tokens = processedText.split(' ')
-  
-  // 对于中文，执行额外的字符级拆分
-  if (opts.language === 'zh') {
-    // 处理中文字符
-    tokens = tokens.reduce((result, token) => {
-      // 如果当前token是纯中文
-      if (/^[\u4e00-\u9fa5]+$/.test(token)) {
-        // 双字组合
-        for (let i = 0; i < token.length - 1; i++) {
-          result.push(token.substring(i, i + 2))
-        }
-        
-        // 三字组合
-        if (token.length >= 3) {
-          for (let i = 0; i < token.length - 2; i++) {
-            result.push(token.substring(i, i + 3))
-          }
-        }
-        
-        // 四字组合
-        if (token.length >= 4) {
-          for (let i = 0; i < token.length - 3; i++) {
-            result.push(token.substring(i, i + 4))
-          }
-        }
-        
-        // 单字也添加进来
-        for (let i = 0; i < token.length; i++) {
-          result.push(token[i])
-        }
-      } else {
-        // 非中文直接添加
-        result.push(token)
-      }
-      return result
-    }, [])
-  }
-  
-  // 过滤掉长度小于最小词长的词
-  tokens = tokens.filter(token => token.length >= opts.minLength)
-  
-  // 如果需要过滤停用词
-  if (opts.filterStopWords) {
-    tokens = tokens.filter(token => !stopWordsList.includes(token))
-  }
-  
-  return tokens
-}
-
-/**
- * 词频分析
- * 
- * @param {Array} segments - 文本段落数组
- * @param {Object} options - 分析选项
- * @returns {Array} - 词频统计结果
- */
-export const analyzeTermFrequency = (segments, options = {}) => {
-  const defaultOptions = {
-    minLength: 2,
-    filterStopWords: true,
-    language: 'zh',
-    limit: 100 // 返回的最大词汇数量
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!segments || !Array.isArray(segments) || segments.length === 0) {
-    return []
-  }
-  
-  // 所有段落的文本内容
-  const allText = segments.map(segment => segment.text).join(' ')
-  
-  // 分词
-  const tokens = tokenize(allText, {
-    minLength: opts.minLength,
-    filterStopWords: opts.filterStopWords,
-    language: opts.language
-  })
-  
-  // 统计词频
-  const frequencyMap = tokens.reduce((acc, token) => {
-    acc[token] = (acc[token] || 0) + 1
-    return acc
-  }, {})
-  
-  // 转换为数组并排序
-  const sortedTerms = Object.entries(frequencyMap)
-    .map(([term, count]) => ({
-      term,
-      count,
-      percentage: count / tokens.length
-    }))
-    .sort((a, b) => b.count - a.count)
-    .slice(0, opts.limit)
-  
-  return sortedTerms
-}
-
-/**
- * 趋势分析
- * 
- * @param {Array} segments - 文本段落数组
- * @param {Array} terms - 要分析趋势的词汇列表
- * @param {Object} options - 分析选项
- * @returns {Object} - 趋势分析结果
- */
-export const analyzeTermTrends = (segments, terms, options = {}) => {
-  const defaultOptions = {
-    minLength: 2,
-    filterStopWords: true,
-    language: 'zh',
-    normalization: 'relative' // 'raw' 或 'relative'
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!segments || !Array.isArray(segments) || segments.length === 0) {
-    return { terms: [], data: [] }
-  }
-  
-  if (!terms || !Array.isArray(terms) || terms.length === 0) {
-    return { terms: [], data: [] }
-  }
-  
-  // 分析每个段落中各词汇的出现频率
-  const trendsData = segments.map(segment => {
-    // 分词
-    const tokens = tokenize(segment.text, {
-      minLength: opts.minLength,
-      filterStopWords: opts.filterStopWords,
-      language: opts.language
-    })
-    
-    // 计算该段落的总词数（用于相对频率）
-    const totalTokens = tokens.length
-    
-    // 统计该段落中每个指定词汇的出现频率
-    const segmentFrequencies = {}
-    
-    // 初始化所有词汇的频率为0
-    terms.forEach(term => {
-      segmentFrequencies[term] = 0
-    })
-    
-    // 统计词频
-    tokens.forEach(token => {
-      if (terms.includes(token)) {
-        segmentFrequencies[token]++
-      }
-    })
-    
-    // 如果使用相对频率，将词频除以总词数
-    if (opts.normalization === 'relative' && totalTokens > 0) {
-      Object.keys(segmentFrequencies).forEach(term => {
-        segmentFrequencies[term] = segmentFrequencies[term] / totalTokens
-      })
-    }
-    
-    return {
-      position: segment.position,
-      id: segment.id,
-      frequencies: segmentFrequencies
-    }
-  })
-  
-  // 组织数据格式，适合图表展示
-  const result = {
-    terms: terms,
-    data: trendsData
-  }
-  
-  return result
-}
-
-/**
- * 搭配词分析
- * 
- * @param {Array} segments - 文本段落数组
- * @param {String} targetTerm - 目标词汇
- * @param {Object} options - 分析选项
- * @returns {Array} - 搭配词分析结果
- */
-export const analyzeCollocates = (segments, targetTerm, options = {}) => {
-  const defaultOptions = {
-    minLength: 2,
-    filterStopWords: true,
-    language: 'zh',
-    window: 5, // 上下文窗口大小
-    limit: 50  // 返回的最大搭配词数量
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!segments || !Array.isArray(segments) || segments.length === 0) {
-    return []
-  }
-  
-  if (!targetTerm || typeof targetTerm !== 'string') {
-    return []
-  }
-  
-  let collocatesMap = {}
-  
-  // 遍历每个段落
-  segments.forEach(segment => {
-    // 分词
-    const tokens = tokenize(segment.text, {
-      minLength: 1, // 设为1以捕获所有可能的词
-      filterStopWords: false, // 暂时不过滤停用词
-      language: opts.language
-    })
-    
-    // 查找目标词在token数组中的所有位置
-    const targetIndices = []
-    tokens.forEach((token, index) => {
-      if (token === targetTerm) {
-        targetIndices.push(index)
-      }
-    })
-    
-    // 对于每个目标词出现的位置，收集其上下文中的词
-    targetIndices.forEach(targetIndex => {
-      // 计算上下文窗口的起止位置
-      const start = Math.max(0, targetIndex - opts.window)
-      const end = Math.min(tokens.length - 1, targetIndex + opts.window)
-      
-      // 收集上下文中的词
-      for (let i = start; i <= end; i++) {
-        if (i !== targetIndex) { // 排除目标词自身
-          const collocate = tokens[i]
-          
-          // 忽略长度小于最小长度的词
-          if (collocate.length < opts.minLength) {
-            continue
-          }
-          
-          // 如果需要过滤停用词
-          if (opts.filterStopWords && stopWordsList.includes(collocate)) {
-            continue
-          }
-          
-          // 统计搭配词出现次数
-          collocatesMap[collocate] = (collocatesMap[collocate] || 0) + 1
-        }
-      }
-    })
-  })
-  
-  // 转换为数组并排序
-  const sortedCollocates = Object.entries(collocatesMap)
-    .map(([term, count]) => ({
-      term,
-      count
-    }))
-    .sort((a, b) => b.count - a.count)
-    .slice(0, opts.limit)
-  
-  return sortedCollocates
-}
-
-/**
- * 上下文分析
- * 
- * @param {Array} segments - 文本段落数组
- * @param {String} targetTerm - 目标词汇
- * @param {Object} options - 分析选项
- * @returns {Array} - 上下文分析结果
- */
-export const analyzeContexts = (segments, targetTerm, options = {}) => {
-  const defaultOptions = {
-    window: 8, // 上下文窗口大小（左侧和右侧各多少个字符）
-    limit: 50  // 返回的最大上下文数量
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!segments || !Array.isArray(segments) || segments.length === 0) {
-    return []
-  }
-  
-  if (!targetTerm || typeof targetTerm !== 'string') {
-    return []
-  }
-  
-  const contexts = []
-  
-  // 遍历每个段落
-  segments.forEach(segment => {
-    const text = segment.text
-    const segmentId = segment.id
-    
-    // 查找目标词在文本中的所有位置
-    let position = 0
-    let foundPos = text.indexOf(targetTerm, position)
-    
-    while (foundPos !== -1 && contexts.length < opts.limit) {
-      // 计算上下文窗口的起止位置
-      const startPos = Math.max(0, foundPos - opts.window)
-      const endPos = Math.min(text.length, foundPos + targetTerm.length + opts.window)
-      
-      // 提取上下文
-      const before = text.substring(startPos, foundPos)
-      const after = text.substring(foundPos + targetTerm.length, endPos)
-      
-      // 添加到结果列表
-      contexts.push({
-        segmentId,
-        position: foundPos,
-        before,
-        term: targetTerm,
-        after
-      })
-      
-      // 继续查找下一个位置
-      position = foundPos + targetTerm.length
-      foundPos = text.indexOf(targetTerm, position)
-    }
-  })
-  
-  return contexts.slice(0, opts.limit)
-}
-
-/**
- * 生成词云数据
- * @param {Array} termFrequency - 术语频率数据，包含 term 和 frequency 字段
- * @param {Number} maxCount - 最大显示术语数量
- * @param {Object} options - 配置选项
- * @param {Number} options.minSize - 最小字体大小
- * @param {Number} options.maxSize - 最大字体大小
- * @returns {Array} 词云数据
- */
-export function generateWordCloudData(termFrequency, maxCount = 100, options = {}) {
-  if (!termFrequency || termFrequency.length === 0) {
-    return []
-  }
-
-  const { minSize = 12, maxSize = 32 } = options
-  const sortedData = [...termFrequency].sort((a, b) => b.frequency - a.frequency)
-  const slicedData = sortedData.slice(0, maxCount)
-  
-  // 找出最大和最小频率
-  const maxFreq = slicedData[0].frequency
-  const minFreq = slicedData[slicedData.length - 1].frequency
-  
-  // 计算每个词的大小
-  return slicedData.map(item => {
-    // 根据频率计算字体大小（线性映射）
-    let size = minSize
-    if (maxFreq !== minFreq) {
-      size = minSize + ((item.frequency - minFreq) / (maxFreq - minFreq)) * (maxSize - minSize)
-    }
-    
-    return {
-      name: item.term,
-      value: item.frequency,
-      size: Math.round(size)
-    }
-  })
-}
-
-/**
- * 提取关键词
- * @param {String} text - 文本内容
- * @param {Object} options - 配置选项
- * @param {Number} options.count - 提取关键词数量
- * @returns {Array} 关键词数组
- */
-export function extractKeywords(text, options = {}) {
-  const { count = 10 } = options
-  
-  // 这里是简化的实现，实际应用中可能需要更复杂的算法
-  if (!text) return []
-  
-  // 将文本分词并计算频率（简化实现）
-  const words = text.split(/\s+|，|。|；|：|！|？|,|\.|;|:|!|\?/)
-    .filter(word => word.length > 1) // 过滤掉单字和空字符
-    
-  const wordFreq = {}
-  words.forEach(word => {
-    wordFreq[word] = (wordFreq[word] || 0) + 1
-  })
-  
-  // 转换为数组并排序
-  const sortedWords = Object.keys(wordFreq)
-    .map(term => ({ term, score: wordFreq[term] }))
-    .sort((a, b) => b.score - a.score)
-    .slice(0, count)
-  
-  return sortedWords
-}
-
-/**
- * 情感分析
- * @param {String} text - 文本内容
- * @returns {Object} 情感分析结果
- */
-export function analyzeSentiment(text) {
-  if (!text) {
-    return {
-      score: 0,
-      label: '中性',
-      positiveCount: 0,
-      negativeCount: 0,
-      neutralCount: 0
-    }
-  }
-  
-  // 简化的情感词典
-  const positiveWords = [
-    '优秀', '良好', '满意', '喜欢', '赞', '好', '优化', '提高', '增长',
-    '成功', '积极', '优点', '强', '高效', '精确', '合适', '顺利'
-  ]
-  
-  const negativeWords = [
-    '差', '失败', '缺点', '问题', '错误', '缺陷', '不良', '不足',
-    '弱', '低效', '不精确', '不合适', '困难', '危险', '降低', '减少'
-  ]
-  
-  // 简单统计正负面词语出现次数
-  let positiveCount = 0
-  let negativeCount = 0
-  let neutralCount = 0
-  
-  // 分词（简化实现）
-  const words = text.split(/\s+|，|。|；|：|！|？|,|\.|;|:|!|\?/)
-    .filter(word => word.length > 0)
-  
-  words.forEach(word => {
-    if (positiveWords.some(pw => word.includes(pw))) {
-      positiveCount++
-    } else if (negativeWords.some(nw => word.includes(nw))) {
-      negativeCount++
-    } else {
-      neutralCount++
-    }
-  })
-  
-  // 计算情感得分：范围从-1到1，-1表示极度负面，1表示极度正面
-  const totalWords = positiveCount + negativeCount + neutralCount
-  let score = 0
-  
-  if (totalWords > 0) {
-    score = (positiveCount - negativeCount) / totalWords
-  }
-  
-  // 确定情感标签
-  let label = '中性'
-  if (score > 0.6) label = '非常积极'
-  else if (score > 0.2) label = '积极'
-  else if (score > -0.2) label = '中性'
-  else if (score > -0.6) label = '消极'
-  else label = '非常消极'
-  
-  return {
-    score,
-    label,
-    positiveCount,
-    negativeCount,
-    neutralCount
-  }
-}
-
-/**
- * 根据词语类别分组术语
- * @param {Array} termFrequency - 术语频率数据
- * @param {Object} categories - 类别词典，键为类别名称，值为该类别下的词语数组
- * @returns {Object} 按类别分组的术语
- */
-export function groupTermsByCategory(termFrequency, categories) {
-  if (!termFrequency || !categories) {
-    return {}
-  }
-  
-  const result = {}
-  
-  // 初始化结果对象
-  Object.keys(categories).forEach(category => {
-    result[category] = []
-  })
-  
-  // 遍历术语频率数据
-  termFrequency.forEach(item => {
-    const { term } = item
-    
-    // 检查该术语属于哪个类别
-    Object.keys(categories).forEach(category => {
-      const categoryWords = categories[category]
-      // 如果术语在类别词语列表中，或者术语包含类别词语中的某个词
-      if (categoryWords.includes(term) || categoryWords.some(word => term.includes(word))) {
-        result[category].push(item)
-      }
-    })
-  })
-  
-  return result
-}
-
-/**
- * 计算文本相似度
- * @param {String} text1 - 第一个文本
- * @param {String} text2 - 第二个文本
- * @returns {Object} 相似度分析结果
- */
-export function calculateTextSimilarity(text1, text2) {
-  if (!text1 || !text2) {
-    return {
-      similarity: 0,
-      commonWords: [],
-      uniqueWords1: [],
-      uniqueWords2: []
-    }
-  }
-  
-  // 简化实现，将文本分词
-  const words1 = text1.split(/\s+|，|。|；|：|！|？|,|\.|;|:|!|\?/)
-    .filter(word => word.length > 1)
-  const words2 = text2.split(/\s+|，|。|；|：|！|？|,|\.|;|:|!|\?/)
-    .filter(word => word.length > 1)
-  
-  // 计算词频
-  const freq1 = {}
-  const freq2 = {}
-  
-  words1.forEach(word => {
-    freq1[word] = (freq1[word] || 0) + 1
-  })
-  
-  words2.forEach(word => {
-    freq2[word] = (freq2[word] || 0) + 1
-  })
-  
-  // 找出共有词和独有词
-  const commonWords = []
-  const uniqueWords1 = []
-  const uniqueWords2 = []
-  
-  Object.keys(freq1).forEach(word => {
-    if (freq2[word]) {
-      commonWords.push({
-        term: word,
-        freq1: freq1[word],
-        freq2: freq2[word]
-      })
-    } else {
-      uniqueWords1.push({
-        term: word,
-        frequency: freq1[word]
-      })
-    }
-  })
-  
-  Object.keys(freq2).forEach(word => {
-    if (!freq1[word]) {
-      uniqueWords2.push({
-        term: word,
-        frequency: freq2[word]
-      })
-    }
-  })
-  
-  // 使用Jaccard相似度计算文本相似度
-  const allWords = new Set([...Object.keys(freq1), ...Object.keys(freq2)])
-  const intersection = commonWords.length
-  const similarity = intersection / allWords.size
-  
-  return {
-    similarity,
-    commonWords,
-    uniqueWords1,
-    uniqueWords2
-  }
-}
-
-/**
- * 将术语频率数据转换为CSV格式
- * @param {Array} termFrequency - 术语频率数据
- * @returns {String} CSV格式的数据
- */
-export function exportTermFrequencyToCsv(termFrequency) {
-  if (!termFrequency || !termFrequency.length) {
-    return ''
-  }
-  
-  // 创建CSV头
-  let csvContent = '术语,频率,百分比\n'
-  
-  // 计算总频率
-  const totalFrequency = termFrequency.reduce((sum, item) => sum + item.frequency, 0)
-  
-  // 添加每一行数据
-  termFrequency.forEach(item => {
-    const percentage = ((item.frequency / totalFrequency) * 100).toFixed(2)
-    csvContent += `${item.term},${item.frequency},${percentage}%\n`
-  })
-  
-  return csvContent
-}
-
-/**
- * 生成模拟的词频数据（用于测试）
- * @param {Number} count - 词条数量
- * @returns {Array} 模拟的词频数据
- */
-export function generateMockTermFrequency(count = 50) {
-  const terms = [
-    '数据', '分析', '报告', '增长', '下降', '趋势', '预测', '计划',
-    '指标', '目标', '完成', '销售', '产品', '市场', '客户', '服务',
-    '研发', '技术', '创新', '战略', '管理', '团队', '协作', '效率',
-    '质量', '成本', '收入', '利润', '预算', '投资', '回报', '风险',
-    '机会', '挑战', '问题', '解决', '方案', '实施', '评估', '改进',
-    '优化', '整合', '扩展', '收缩', '专注', '多元', '流程', '体系',
-    '标准', '规范', '合规', '审计', '监控', '决策', '执行', '反馈',
-    '调整', '变革', '稳定', '增值', '季度', '年度', '月度', '周期',
-    '短期', '长期', '快速', '稳健', '领先', '落后', '核心', '边缘',
-    '内部', '外部', '上游', '下游', '供应', '需求', '库存', '周转',
-    '提高', '降低', '强化', '弱化', '轻量', '重量', '敏捷', '刚性',
-    '柔性', '弹性', '创造', '破坏', '构建', '拆解', '增加', '减少'
-  ]
-  
-  // 确保词条数量不超过可用词条
-  const actualCount = Math.min(count, terms.length)
-  const selectedTerms = []
-  
-  // 随机选择词条
-  while (selectedTerms.length < actualCount) {
-    const randomIndex = Math.floor(Math.random() * terms.length)
-    const term = terms[randomIndex]
-    
-    if (!selectedTerms.find(item => item.term === term)) {
-      // 生成随机频率，使前面的项有更高的频率（模拟长尾分布）
-      const frequency = Math.floor(Math.random() * 100) + 
-                        Math.floor(Math.random() * (100 - selectedTerms.length * 2))
-      
-      selectedTerms.push({
-        term,
-        frequency
-      })
-    }
-  }
-  
-  // 按频率降序排序
-  return selectedTerms.sort((a, b) => b.frequency - a.frequency)
-}
-
-/**
- * 生成词汇关联网络数据
- * 
- * @param {Array} segments - 文本段落数组
- * @param {Array} terms - 要分析关系的词汇列表
- * @param {Object} options - 分析选项
- * @returns {Object} - 关联网络数据
- */
-export const generateTermLinks = (segments, terms, options = {}) => {
-  const defaultOptions = {
-    minCooccurrence: 2, // 最小共现次数
-    window: 50, // 共现窗口大小（段落内字符数）
-    maxLinks: 50 // 最大链接数
-  }
-  
-  const opts = { ...defaultOptions, ...options }
-  
-  if (!segments || !Array.isArray(segments) || segments.length === 0) {
-    return { nodes: [], links: [] }
-  }
-  
-  if (!terms || !Array.isArray(terms) || terms.length < 2) {
-    return { nodes: [], links: [] }
-  }
-  
-  // 初始化共现矩阵
-  const cooccurrenceMatrix = {}
-  terms.forEach(term1 => {
-    cooccurrenceMatrix[term1] = {}
-    terms.forEach(term2 => {
-      if (term1 !== term2) {
-        cooccurrenceMatrix[term1][term2] = 0
-      }
-    })
-  })
-  
-  // 统计词汇共现次数
-  segments.forEach(segment => {
-    const text = segment.text
-    
-    // 对每对词汇，检查它们是否在窗口内共现
-    terms.forEach(term1 => {
-      // 找出term1在文本中的所有位置
-      let positions1 = []
-      let pos = 0
-      while (text.indexOf(term1, pos) !== -1) {
-        const foundPos = text.indexOf(term1, pos)
-        positions1.push(foundPos)
-        pos = foundPos + term1.length
-      }
-      
-      // 对于term1的每个位置，检查其他词是否在窗口内
-      terms.forEach(term2 => {
-        if (term1 !== term2) {
-          // 找出term2在文本中的所有位置
-          let positions2 = []
-          pos = 0
-          while (text.indexOf(term2, pos) !== -1) {
-            const foundPos = text.indexOf(term2, pos)
-            positions2.push(foundPos)
-            pos = foundPos + term2.length
-          }
-          
-          // 检查term1和term2是否在窗口内共现
-          positions1.forEach(pos1 => {
-            positions2.forEach(pos2 => {
-              if (Math.abs(pos1 - pos2) <= opts.window) {
-                cooccurrenceMatrix[term1][term2]++
-              }
-            })
-          })
-        }
-      })
-    })
-  })
-  
-  // 生成节点和链接数据
-  const nodes = terms.map(term => ({
-    name: term,
-    value: 1 // 可以替换为词频
-  }))
-  
-  let links = []
-  
-  // 添加链接
-  terms.forEach(term1 => {
-    terms.forEach(term2 => {
-      if (term1 !== term2 && cooccurrenceMatrix[term1][term2] >= opts.minCooccurrence) {
-        links.push({
-          source: term1,
-          target: term2,
-          value: cooccurrenceMatrix[term1][term2]
-        })
-      }
-    })
-  })
-  
-  // 按共现次数排序并限制链接数量
-  links = links.sort((a, b) => b.value - a.value).slice(0, opts.maxLinks)
-  
-  return { nodes, links }
-} 
--- a/src/vue2/src/views/Statistics.vue
+++ b/src/vue2/src/views/Statistics.vue
@ -0,0 +1,500 @@
+<template>
+  <div class="statistics-page">
+    <!-- 工具选项卡区域 -->
+    <div class="tools-tabs">
+      <el-tabs type="card">
+        <el-tab-pane>
+          <span slot="label"><i class="el-icon-pie-chart"></i> Cirrus</span>
+        </el-tab-pane>
+        <el-tab-pane>
+          <span slot="label"><i class="el-icon-document"></i> Terms</span>
+        </el-tab-pane>
+        <el-tab-pane>
+          <span slot="label"><i class="el-icon-share"></i> Links</span>
+        </el-tab-pane>
+      </el-tabs>
+      <div class="tab-actions">
+        <el-button type="text" icon="el-icon-question" circle></el-button>
+      </div>
+    </div>
+    
+    <!-- 主要内容区域 - 三列布局 -->
+    <div class="main-container">
+      <!-- 左侧区域：词云可视化 -->
+      <div class="left-panel">
+        <div class="panel cirrus-panel">
+          <div class="panel-header">
+            <h3>词云可视化 (Cirrus)</h3>
+            <div class="panel-controls">
+              <el-button size="mini" icon="el-icon-refresh" circle></el-button>
+              <el-button size="mini" icon="el-icon-question" circle></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="wordcloud-container">
+              <!-- 词云图位置 -->
+              <div class="wordcloud-placeholder">
+                <i class="el-icon-picture-outline"></i>
+                <div>词云图将显示在这里</div>
+              </div>
+            </div>
+          </div>
+        </div>
+        
+        <div class="panel corpus-panel">
+          <div class="panel-header">
+            <h3>语料库</h3>
+            <div class="panel-controls">
+              <el-button size="mini" icon="el-icon-setting" circle></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="corpus-list">
+              <div class="corpus-item active">
+                <div class="corpus-title">airline_data</div>
+                <div class="corpus-stats">3,597 词 | 1,691 类型 | 47%</div>
+              </div>
+              <div class="corpus-item">航班数据 2023-01</div>
+              <div class="corpus-item">航班数据 2023-02</div>
+              <div class="corpus-item">航班数据 2023-03</div>
+            </div>
+          </div>
+        </div>
+      </div>
+      
+      <!-- 中央区域：文本阅读器和词频分析 -->
+      <div class="center-panel">
+        <div class="panel reader-panel">
+          <div class="panel-header">
+            <h3>Reader</h3>
+            <div class="panel-controls">
+              <el-button-group>
+                <el-button size="mini" icon="el-icon-arrow-left"></el-button>
+                <el-button size="mini" icon="el-icon-arrow-right"></el-button>
+              </el-button-group>
+              <el-input size="mini" placeholder="搜索..." style="width: 150px; margin-left: 10px">
+                <i slot="suffix" class="el-input__icon el-icon-search"></i>
+              </el-input>
+              <el-button size="mini" icon="el-icon-question" circle style="margin-left: 5px"></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="reader-content">
+              <div class="location-bar">
+                Location <i class="el-icon-caret-bottom"></i>
+              </div>
+              <div class="text-content">
+                <p>(BAW) BA 125 英国-伦敦 柬埔寨巴洋航空 Cambodia Bayon Airlines BD 688 金边 英国大西航空公司 British Midland Airways Ltd. (BMA) BD* BMA 英国 孟加拉比曼航空公司 Biman Bangladesh (BBG) BG 997 孟加拉达卡 文莱皇家航空公司 Royal Brunei Airlines (RBA) BI 672 文莱-斯里巴加湾 奥凯航空有限公司 Okay Airways Company Limited BK 866 中国北京 塔斯木乃牙海河航空公司 Jetstar Pacific Airlines(PIC) BL 550 越南胡志明 台湾长荣航空公司 EVA Airways (EVA) BR 695 台湾-桃源 孟加拉联合航空公司 US-Bangla Airlines Limited BS UBG Dhaka 达卡 波罗的海航空公司 Air Baltic (BTI) BT 657 拉脱维亚-里加 西印度航空公司 BWIA West Indies Airways Limited (BWA) BW 106 西印子 釜山航空公司 Air Busan BX ABL 釜山市...</p>
+              </div>
+            </div>
+          </div>
+        </div>
+        
+        <div class="panel terms-panel">
+          <div class="panel-header">
+            <h3>TermsBerry</h3>
+            <div class="panel-controls">
+              <el-button size="mini" icon="el-icon-question" circle></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="terms-berry-placeholder">
+              <!-- 术语关系图位置 -->
+              <p class="terms-msg">这里将显示术语关系可视化</p>
+            </div>
+          </div>
+        </div>
+      </div>
+      
+      <!-- 右侧区域：趋势分析和上下文 -->
+      <div class="right-panel">
+        <div class="panel trends-panel">
+          <div class="panel-header">
+            <h3>Trends</h3>
+            <div class="panel-controls">
+              <div class="legend">
+                <span class="legend-item"><i class="dot blue"></i> 航空</span>
+                <span class="legend-item"><i class="dot purple"></i> 美国</span>
+                <span class="legend-item"><i class="dot teal"></i> airways</span>
+                <span class="legend-item"><i class="dot green"></i> airlines</span>
+                <span class="legend-item"><i class="dot pink"></i> air</span>
+              </div>
+              <el-button size="mini" icon="el-icon-question" circle></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="trends-chart">
+              <!-- 趋势图表位置 -->
+              <div class="trends-placeholder">
+                <i class="el-icon-data-line"></i>
+                <div>趋势图将显示在这里</div>
+              </div>
+            </div>
+          </div>
+        </div>
+        
+        <div class="panel contexts-panel">
+          <div class="panel-header">
+            <h3>Contexts</h3>
+            <div class="panel-controls">
+              <el-button size="mini" icon="el-icon-question" circle></el-button>
+            </div>
+          </div>
+          <div class="panel-content">
+            <div class="contexts-table">
+              <el-table :data="contextData" size="mini" border style="width: 100%">
+                <el-table-column prop="document" label="Document" width="100"></el-table-column>
+                <el-table-column prop="left" label="Left" width="120"></el-table-column>
+                <el-table-column prop="term" label="Term" width="80"></el-table-column>
+                <el-table-column prop="right" label="Right"></el-table-column>
+              </el-table>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+    
+    <!-- 底部工具栏和数据摘要 -->
+    <div class="bottom-toolbar">
+      <div class="summary-section">
+        <span>Terms: <span class="count">0</span></span>
+        <div class="slider-container">
+          <el-slider v-model="termSlider" :show-tooltip="false" size="small"></el-slider>
+        </div>
+      </div>
+      <div class="tab-buttons">
+        <el-button-group>
+          <el-button size="small" icon="el-icon-s-operation"> Summary</el-button>
+          <el-button size="small" icon="el-icon-document"> Documents</el-button>
+          <el-button size="small" icon="el-icon-s-grid"> Phrases</el-button>
+        </el-button-group>
+      </div>
+      <div class="export-section">
+        <el-button size="small" icon="el-icon-download">导出</el-button>
+      </div>
+    </div>
+    
+    <!-- 数据摘要表格 -->
+    <div class="data-summary">
+      <el-table :data="summaryData" size="mini" border style="width: 100%">
+        <el-table-column prop="title" label="Title" width="150"></el-table-column>
+        <el-table-column prop="words" label="Words" width="80"></el-table-column>
+        <el-table-column prop="types" label="Types" width="80"></el-table-column>
+        <el-table-column prop="ratio" label="Ratio" width="80"></el-table-column>
+        <el-table-column prop="wordsPerSentence" label="Words/Sentence"></el-table-column>
+      </el-table>
+    </div>
+  </div>
+</template>
+
+<script>
+export default {
+  name: 'StatisticsPage',
+  data() {
+    return {
+      termSlider: 25,
+      contextData: [
+        { document: 'airline_data', left: '摩尔多瓦国际航空公司 Moldavian', term: 'air...', right: '...lines', id: 'ZM 391 基希讷乌 联盟' },
+        { document: 'airline_data', left: '基希讷乌 联盟航空公司 Alliance', term: 'air...', right: '...lines', id: '3A 317 美国 东' },
+        { document: 'airline_data', left: '新加坡 四川航空公司 Sichuan', term: 'air...', right: '...lines', id: 'Co.,Ltd 3U 876' },
+        { document: 'airline_data', left: '南非空快航空公司 Airlink', term: 'air...', right: '...lines', id: '4Z 749 约翰内斯堡 百' }
+      ],
+      summaryData: [
+        { title: 'airline_data', words: '3,597', types: '1,691', ratio: '47%', wordsPerSentence: '189.3' }
+      ]
+    }
+  },
+  methods: {
+    // 统计分析相关方法
+  }
+}
+</script>
+
+<style scoped>
+.statistics-page {
+  padding: 0;
+  margin-top: 0;
+}
+
+/* 工具选项卡样式 */
+.tools-tabs {
+  display: flex;
+  align-items: center;
+  background: #2b7bba;
+  padding: 0 10px;
+  border-radius: 0;
+  color: white;
+  position: relative;
+  margin-bottom: 0;
+  border-top: 1px solid rgba(255,255,255,0.1);
+}
+
+.tools-tabs >>> .el-tabs__header {
+  margin-bottom: 0;
+  border-bottom: none;
+}
+
+.tools-tabs >>> .el-tabs--card>.el-tabs__header .el-tabs__item {
+  color: white;
+  background: rgba(255,255,255,0.1);
+  border: none;
+  height: 36px;
+  line-height: 36px;
+}
+
+.tools-tabs >>> .el-tabs--card>.el-tabs__header .el-tabs__item.is-active {
+  background: rgba(255,255,255,0.2);
+  color: white;
+  border-bottom: 2px solid white;
+}
+
+.tab-actions {
+  margin-left: auto;
+}
+
+/* 三列布局样式 */
+.main-container {
+  display: grid;
+  grid-template-columns: 300px 1fr 450px;
+  gap: 0;
+  margin-bottom: 0;
+  height: calc(100vh - 220px);
+  min-height: 500px;
+}
+
+/* 面板通用样式 */
+.panel {
+  background-color: #fff;
+  border-radius: 0;
+  box-shadow: none;
+  display: flex;
+  flex-direction: column;
+  margin-bottom: 0;
+  height: calc(50% - 1px);
+  overflow: hidden;
+  border: 1px solid #e4e7ed;
+}
+
+.panel-header {
+  padding: 8px 12px;
+  border-bottom: 1px solid #ebeef5;
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  background-color: #f5f7fa;
+}
+
+.panel-header h3 {
+  margin: 0;
+  font-size: 14px;
+  color: #303133;
+}
+
+.panel-controls {
+  display: flex;
+  align-items: center;
+  gap: 5px;
+}
+
+.panel-content {
+  padding: 10px;
+  overflow: auto;
+  flex: 1;
+}
+
+/* 词云面板 */
+.wordcloud-container {
+  width: 100%;
+  height: 100%;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.wordcloud-placeholder {
+  width: 100%;
+  height: 90%;
+  max-height: 300px;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  background-color: #f5f7fa;
+  border-radius: 4px;
+  color: #909399;
+  border: 1px dashed #dcdfe6;
+}
+
+.wordcloud-placeholder i {
+  font-size: 32px;
+  margin-bottom: 10px;
+}
+
+/* 语料库列表 */
+.corpus-list {
+  display: flex;
+  flex-direction: column;
+}
+
+.corpus-item {
+  padding: 8px 12px;
+  border-bottom: 1px solid #ebeef5;
+  cursor: pointer;
+}
+
+.corpus-item:hover {
+  background-color: #f5f7fa;
+}
+
+.corpus-item.active {
+  background-color: #ecf5ff;
+  border-left: 2px solid #409eff;
+}
+
+.corpus-title {
+  font-weight: bold;
+}
+
+.corpus-stats {
+  font-size: 12px;
+  color: #909399;
+}
+
+/* 阅读器 */
+.reader-content {
+  display: flex;
+  flex-direction: column;
+  height: 100%;
+}
+
+.location-bar {
+  padding: 5px 10px;
+  background-color: #f5f7fa;
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  font-size: 12px;
+  color: #606266;
+}
+
+.text-content {
+  padding: 10px;
+  flex: 1;
+  overflow: auto;
+  font-size: 13px;
+  line-height: 1.5;
+}
+
+/* 术语浆果图 */
+.terms-berry-placeholder {
+  width: 100%;
+  height: 100%;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background-color: #f5f7fa;
+  border-radius: 4px;
+}
+
+.terms-msg {
+  color: #909399;
+  font-style: italic;
+}
+
+/* 趋势面板 */
+.trends-chart {
+  width: 100%;
+  height: 100%;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.trends-placeholder {
+  width: 100%;
+  height: 90%;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  background-color: #f5f7fa;
+  border-radius: 4px;
+  color: #909399;
+  border: 1px dashed #dcdfe6;
+}
+
+.trends-placeholder i {
+  font-size: 32px;
+  margin-bottom: 10px;
+}
+
+.legend {
+  display: flex;
+  gap: 10px;
+  font-size: 12px;
+}
+
+.legend-item {
+  display: flex;
+  align-items: center;
+  gap: 3px;
+}
+
+.dot {
+  width: 8px;
+  height: 8px;
+  border-radius: 50%;
+  display: inline-block;
+}
+
+.blue { background-color: #1890ff; }
+.purple { background-color: #722ed1; }
+.teal { background-color: #13c2c2; }
+.green { background-color: #52c41a; }
+.pink { background-color: #eb2f96; }
+
+/* 底部工具栏 */
+.bottom-toolbar {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  background-color: #f5f7fa;
+  padding: 8px 12px;
+  border-radius: 0;
+  margin-bottom: 0;
+  border-top: 1px solid #e4e7ed;
+}
+
+.summary-section {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  width: 200px;
+}
+
+.slider-container {
+  width: 100px;
+}
+
+.count {
+  font-weight: bold;
+}
+
+/* 响应式调整 */
+@media (max-width: 1400px) {
+  .main-container {
+    grid-template-columns: 250px 1fr 350px;
+  }
+}
+
+@media (max-width: 1200px) {
+  .main-container {
+    grid-template-columns: 220px 1fr 300px;
+  }
+}
+
+/* 数据摘要表格 */
+.data-summary {
+  border-top: 1px solid #e4e7ed;
+}
+</style>