|
|
/*
|
|
|
* tree06.cpp
|
|
|
*
|
|
|
* Created on: Apr 28, 2024
|
|
|
* Author: 28032
|
|
|
*/
|
|
|
|
|
|
//#include <iostream>
|
|
|
//#include <fstream>
|
|
|
//#include <vector>
|
|
|
//#include <algorithm>
|
|
|
//#include <chrono>
|
|
|
//
|
|
|
//using namespace std;
|
|
|
//
|
|
|
//class Trie
|
|
|
//{
|
|
|
//public:
|
|
|
// Trie()
|
|
|
// {
|
|
|
// for(int i = 0;i < 26;i++)
|
|
|
// children[i] = NULL;
|
|
|
// wordcount = 0;
|
|
|
// }
|
|
|
// Trie(int cnt)
|
|
|
// {
|
|
|
// for(int i = 0;i < 26;i++)
|
|
|
// children[i] = NULL;
|
|
|
// wordcount = cnt;
|
|
|
// }
|
|
|
// Trie* children[26];
|
|
|
// int wordcount;
|
|
|
//}*trie;
|
|
|
//
|
|
|
//Trie* addWord(Trie* last,int site)
|
|
|
//{
|
|
|
// if(last->children[site])
|
|
|
// return last->children[site];
|
|
|
// else
|
|
|
// return last->children[site] = new Trie();
|
|
|
//}
|
|
|
//
|
|
|
//vector<pair<string,int> > ranking;
|
|
|
//
|
|
|
//void showWord(Trie* root,string w)
|
|
|
//{
|
|
|
// if(root->wordcount)
|
|
|
// ranking.push_back(make_pair(w,root->wordcount));
|
|
|
// for(int i = 0;i < 26;i++)
|
|
|
// {
|
|
|
// if(root->children[i])
|
|
|
// {
|
|
|
// showWord(root->children[i],w+(char)('a'+i));
|
|
|
// }
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//bool cmp(pair<string,int>& a,pair<string,int>& b)
|
|
|
//{
|
|
|
// if(a.second != b.second)
|
|
|
// return a.second > b.second;
|
|
|
//
|
|
|
// return a.first < b.first;
|
|
|
//}
|
|
|
//int main()
|
|
|
//{
|
|
|
// ifstream fp("in.txt",ios::in);
|
|
|
// trie = new Trie;
|
|
|
//
|
|
|
// string line;
|
|
|
// auto begin = std::chrono::high_resolution_clock::now();
|
|
|
// while(getline(fp,line))
|
|
|
// {
|
|
|
// int l = line.length();
|
|
|
// Trie* current = trie;
|
|
|
// for(int i = 0;i < l;i++)
|
|
|
// {
|
|
|
// if('a'<=line[i] && line[i]<='z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'a');
|
|
|
// }
|
|
|
// else if('A'<=line[i] && line[i]<='Z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'A');
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// if(current!=trie)
|
|
|
// current->wordcount++;
|
|
|
// current = trie;
|
|
|
// }
|
|
|
// }
|
|
|
// }
|
|
|
//
|
|
|
// showWord(trie,"");
|
|
|
//
|
|
|
// sort(ranking.begin(),ranking.end(),cmp);
|
|
|
//
|
|
|
// if(ranking.size()<100)
|
|
|
// for(auto it:ranking)
|
|
|
// cout<<it.first<<" "<<it.second<<endl;
|
|
|
// else
|
|
|
// for(auto it = ranking.begin();it!=ranking.begin()+100;it++)
|
|
|
// cout<<it->first<<" "<<it->second<<endl;
|
|
|
//
|
|
|
// auto end = std::chrono::high_resolution_clock::now();
|
|
|
// auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
|
|
// printf("Time measured: %.5f seconds.\n", elapsed.count() * 1e-9);
|
|
|
// return 0;
|
|
|
//}
|
|
|
|
|
|
//#include <iostream>
|
|
|
//#include <fstream>
|
|
|
//#include <vector>
|
|
|
//#include <algorithm>
|
|
|
//#include <chrono>
|
|
|
//
|
|
|
//using namespace std;
|
|
|
//
|
|
|
//class Trie
|
|
|
//{
|
|
|
//public:
|
|
|
// Trie()
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling =NULL;
|
|
|
// wordsite = 28;
|
|
|
// wordcount = 0;
|
|
|
// }
|
|
|
// Trie(int site,int cnt)
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling = NULL;
|
|
|
// wordsite = site;
|
|
|
// wordcount = cnt;
|
|
|
// }
|
|
|
// Trie* children;
|
|
|
// Trie* sibling;
|
|
|
// int wordsite;
|
|
|
// int wordcount;
|
|
|
//}*trie;
|
|
|
//
|
|
|
//Trie* addWord(Trie* last,int site)
|
|
|
//{
|
|
|
// Trie* current = last->children;
|
|
|
//
|
|
|
// if(current)
|
|
|
// {
|
|
|
// while(current)
|
|
|
// {
|
|
|
// if(current->wordsite == site)
|
|
|
// return current;
|
|
|
// current = current->sibling;
|
|
|
// }
|
|
|
// current = last->children;
|
|
|
// while(current->sibling)
|
|
|
// current = current->sibling;
|
|
|
// return current->sibling = new Trie(site,0);
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// return last->children = new Trie(site,0);
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//vector<pair<string,int> > ranking;
|
|
|
//
|
|
|
//void showWord(Trie* root,string w)
|
|
|
//{
|
|
|
// if(root->wordcount)
|
|
|
// ranking.push_back(make_pair(w,root->wordcount));
|
|
|
//
|
|
|
// for(Trie* i = root->children;i !=NULL;i = i->sibling)
|
|
|
// {
|
|
|
// showWord(i,w+(char)('a'+i->wordsite));
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//bool cmp(pair<string,int>& a,pair<string,int>& b)
|
|
|
//{
|
|
|
// if(a.second != b.second)
|
|
|
// return a.second > b.second;
|
|
|
//
|
|
|
// return a.first < b.first;
|
|
|
//}
|
|
|
//int main()
|
|
|
//{
|
|
|
// ifstream fp("in.txt",ios::in);
|
|
|
// trie = new Trie;
|
|
|
//
|
|
|
// string line;
|
|
|
// auto begin = std::chrono::high_resolution_clock::now();
|
|
|
// while(getline(fp,line))
|
|
|
// {
|
|
|
// int l = line.length();
|
|
|
// Trie* current = trie;
|
|
|
// for(int i = 0;i < l;i++)
|
|
|
// {
|
|
|
// if('a'<=line[i] && line[i]<='z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'a');
|
|
|
// }
|
|
|
// else if('A'<=line[i] && line[i]<='Z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'A');
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// if(current!=trie)
|
|
|
// current->wordcount++;
|
|
|
// current = trie;
|
|
|
// }
|
|
|
// }
|
|
|
// }
|
|
|
//
|
|
|
// showWord(trie,"");
|
|
|
//
|
|
|
// sort(ranking.begin(),ranking.end(),cmp);
|
|
|
//
|
|
|
// if(ranking.size()<100)
|
|
|
// for(auto it:ranking)
|
|
|
// cout<<it.first<<" "<<it.second<<endl;
|
|
|
// else
|
|
|
// for(auto it = ranking.begin();it!=ranking.begin()+100;it++)
|
|
|
// cout<<it->first<<" "<<it->second<<endl;
|
|
|
//
|
|
|
// auto end = std::chrono::high_resolution_clock::now();
|
|
|
// auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
|
|
// printf("Time measured: %.5f seconds.\n", elapsed.count() * 1e-9);
|
|
|
// return 0;
|
|
|
//}
|
|
|
|
|
|
//#include <iostream>
|
|
|
//#include <fstream>
|
|
|
//#include <vector>
|
|
|
//#include <algorithm>
|
|
|
//#include <chrono>
|
|
|
//#include <thread>
|
|
|
//#include <mutex>
|
|
|
//
|
|
|
//using namespace std;
|
|
|
//
|
|
|
//mutex mtx;
|
|
|
//
|
|
|
//class Trie
|
|
|
//{
|
|
|
//public:
|
|
|
// Trie()
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling =NULL;
|
|
|
// wordsite = 28;
|
|
|
// wordcount = 0;
|
|
|
// }
|
|
|
// Trie(int site,int cnt)
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling = NULL;
|
|
|
// wordsite = site;
|
|
|
// wordcount = cnt;
|
|
|
// }
|
|
|
// Trie* children;
|
|
|
// Trie* sibling;
|
|
|
// int wordsite;
|
|
|
// int wordcount;
|
|
|
//}*trie;
|
|
|
//
|
|
|
//Trie* addWord(Trie* last,int site)
|
|
|
//{
|
|
|
// Trie* current = last->children;
|
|
|
//
|
|
|
// if(current)
|
|
|
// {
|
|
|
// while(current)
|
|
|
// {
|
|
|
// if(current->wordsite == site)
|
|
|
// return current;
|
|
|
// current = current->sibling;
|
|
|
// }
|
|
|
// current = last->children;
|
|
|
// while(current->sibling)
|
|
|
// current = current->sibling;
|
|
|
// return current->sibling = new Trie(site,0);
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// return last->children = new Trie(site,0);
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//vector<pair<string,int> > ranking;
|
|
|
//
|
|
|
//void showWord(Trie* root,string w)
|
|
|
//{
|
|
|
// if(root->wordcount)
|
|
|
// ranking.push_back(make_pair(w,root->wordcount));
|
|
|
//
|
|
|
// for(Trie* i = root->children;i !=NULL;i = i->sibling)
|
|
|
// {
|
|
|
// showWord(i,w+(char)('a'+i->wordsite));
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//bool cmp(pair<string,int>& a,pair<string,int>& b)
|
|
|
//{
|
|
|
// if(a.second != b.second)
|
|
|
// return a.second > b.second;
|
|
|
//
|
|
|
// return a.first < b.first;
|
|
|
//}
|
|
|
//
|
|
|
//void buildTrie(void)
|
|
|
//{
|
|
|
// ifstream fp("in.txt",ios::in);
|
|
|
// trie = new Trie;
|
|
|
//
|
|
|
// string line;
|
|
|
// while(1)
|
|
|
// {
|
|
|
// lock_guard<mutex> lg(mtx);
|
|
|
// getline(fp,line);
|
|
|
// if(getline(fp,line))
|
|
|
// break;
|
|
|
// int l = line.length();
|
|
|
// Trie* current = trie;
|
|
|
// for(int i = 0;i < l;i++)
|
|
|
// {
|
|
|
// if('a'<=line[i] && line[i]<='z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'a');
|
|
|
// }
|
|
|
// else if('A'<=line[i] && line[i]<='Z')
|
|
|
// {
|
|
|
// current = addWord(current,line[i]-'A');
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// if(current!=trie)
|
|
|
// current->wordcount++;
|
|
|
// current = trie;
|
|
|
// }
|
|
|
// }
|
|
|
// }
|
|
|
//}
|
|
|
//int main()
|
|
|
//{
|
|
|
// auto begin = std::chrono::high_resolution_clock::now();
|
|
|
//
|
|
|
// thread t1(buildTrie);
|
|
|
// thread t2(buildTrie);
|
|
|
// thread t3(buildTrie);
|
|
|
// thread t4(buildTrie);
|
|
|
//
|
|
|
// t1.join();
|
|
|
// t2.join();
|
|
|
// t3.join();
|
|
|
// t4.join();
|
|
|
//
|
|
|
// showWord(trie,"");
|
|
|
//
|
|
|
// sort(ranking.begin(),ranking.end(),cmp);
|
|
|
//
|
|
|
// if(ranking.size()<100)
|
|
|
// for(auto it:ranking)
|
|
|
// cout<<it.first<<" "<<it.second<<endl;
|
|
|
// else
|
|
|
// for(auto it = ranking.begin();it!=ranking.begin()+100;it++)
|
|
|
// cout<<it->first<<" "<<it->second<<endl;
|
|
|
//
|
|
|
// auto end = std::chrono::high_resolution_clock::now();
|
|
|
// auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
|
|
// printf("Time measured: %.5f seconds.\n", elapsed.count() * 1e-9);
|
|
|
// return 0;
|
|
|
//}
|
|
|
//
|
|
|
//
|
|
|
//
|
|
|
|
|
|
//#include <iostream>
|
|
|
//#include <fstream>
|
|
|
//#include <vector>
|
|
|
//#include <algorithm>
|
|
|
//#include <chrono>
|
|
|
//#include <thread>
|
|
|
//#include <mutex>
|
|
|
//
|
|
|
//using namespace std;
|
|
|
//
|
|
|
//// ... (Trie类的定义保持不变)
|
|
|
//
|
|
|
//class Trie
|
|
|
// {
|
|
|
// public:
|
|
|
// Trie()
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling =NULL;
|
|
|
// wordsite = 28;
|
|
|
// wordcount = 0;
|
|
|
// }
|
|
|
// Trie(int site,int cnt)
|
|
|
// {
|
|
|
// children = NULL;
|
|
|
// sibling = NULL;
|
|
|
// wordsite = site;
|
|
|
// wordcount = cnt;
|
|
|
// }
|
|
|
// Trie* children;
|
|
|
// Trie* sibling;
|
|
|
// int wordsite;
|
|
|
// int wordcount;
|
|
|
// }*trie;
|
|
|
//mutex trieMutex;
|
|
|
//
|
|
|
//void processLine(Trie* root, const string& line, int offset)
|
|
|
//{
|
|
|
// int l = line.length();
|
|
|
// Trie* current = root;
|
|
|
// for (int i = 0; i < l; i++)
|
|
|
// {
|
|
|
// char ch = line[i];
|
|
|
// int site;
|
|
|
// if ('a' <= ch && ch <= 'z')
|
|
|
// {
|
|
|
// site = ch - 'a';
|
|
|
// }
|
|
|
// else if ('A' <= ch && ch <= 'Z')
|
|
|
// {
|
|
|
// site = ch - 'A';
|
|
|
// }
|
|
|
// else
|
|
|
// {
|
|
|
// lock_guard<mutex> guard(trieMutex); // 锁定以保护Trie树的访问
|
|
|
// if (current != root)
|
|
|
// current->wordcount++;
|
|
|
// current = root;
|
|
|
// continue;
|
|
|
// }
|
|
|
//
|
|
|
// // 锁定以保护Trie树的访问
|
|
|
// lock_guard<mutex> guard(trieMutex);
|
|
|
// current = addWord(current, site);
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//void showWordThreadSafe(Trie* root, string w, vector<pair<string, int>>& localRanking, mutex& rankingMutex)
|
|
|
//{
|
|
|
// // ... (showWord函数的逻辑保持不变,但需要锁定ranking向量)
|
|
|
//
|
|
|
// if(root->wordcount)
|
|
|
// ranking.push_back(make_pair(w,root->wordcount));
|
|
|
// lock_guard<mutex> guard(rankingMutex);
|
|
|
// for (auto& p : localRanking)
|
|
|
// {
|
|
|
// ranking.push_back(p);
|
|
|
// }
|
|
|
//}
|
|
|
//
|
|
|
//int main()
|
|
|
//{
|
|
|
// // ... (trie初始化代码保持不变)
|
|
|
//
|
|
|
// ifstream fp("in.txt", ios::in);
|
|
|
// vector<thread> threads;
|
|
|
// vector<pair<string, int>> perThreadRanking;
|
|
|
// mutex rankingMutex;
|
|
|
//
|
|
|
// int numThreads = thread::hardware_concurrency(); // 获取硬件支持的线程数
|
|
|
// int linesPerThread = fp.lines() / numThreads; // 假设我们可以这样获取行数
|
|
|
//
|
|
|
// for (int i = 0; i < numThreads; ++i)
|
|
|
// {
|
|
|
// threads.emplace_back([=, &perThreadRanking]() {
|
|
|
// string line;
|
|
|
// int lineCount = 0;
|
|
|
// while (getline(fp, line) && lineCount < linesPerThread)
|
|
|
// {
|
|
|
// processLine(trie, line, i * linesPerThread + lineCount);
|
|
|
// lineCount++;
|
|
|
// }
|
|
|
// showWordThreadSafe(trie, "", perThreadRanking, rankingMutex);
|
|
|
// });
|
|
|
// }
|
|
|
//
|
|
|
// for_each(threads.begin(), threads.end(), mem_fn(&thread::join));
|
|
|
//
|
|
|
// // ... (排名和输出代码保持不变)
|
|
|
//
|
|
|
// return 0;
|
|
|
//}
|