parent
6c9ae28777
commit
ce47ac2ae2
@ -1,2 +0,0 @@
|
||||
.gradle/
|
||||
.idea/
|
||||
@ -0,0 +1,20 @@
|
||||
package config;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
@Component
|
||||
@ConfigurationProperties(prefix="job")
|
||||
public class JobConfig {
|
||||
private ArrayList<String> jobNames;
|
||||
|
||||
public ArrayList<String> getJobNames() {
|
||||
return jobNames;
|
||||
}
|
||||
|
||||
public void setJobNames(ArrayList<String> jobNames) {
|
||||
this.jobNames = jobNames;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,58 @@
|
||||
package database;
|
||||
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.Id;
|
||||
import java.util.ArrayList;
|
||||
|
||||
@Entity
|
||||
public class Bayesian {
|
||||
@Id
|
||||
//以company中的type作为id便于搜索
|
||||
private String type;
|
||||
//存储熟练度的分类
|
||||
private ArrayList<String> proficiencies;
|
||||
//存储技术
|
||||
private ArrayList<String> technologies;
|
||||
|
||||
public Bayesian() {
|
||||
}
|
||||
|
||||
public Bayesian(String type, ArrayList<String> proficiencies, ArrayList<String> technologies) {
|
||||
this.type = type;
|
||||
this.proficiencies = proficiencies;
|
||||
this.technologies = technologies;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public ArrayList<String> getProficiencies() {
|
||||
return proficiencies;
|
||||
}
|
||||
|
||||
public void setProficiencies(ArrayList<String> proficiencies) {
|
||||
this.proficiencies = proficiencies;
|
||||
}
|
||||
|
||||
public ArrayList<String> getTechnologies() {
|
||||
return technologies;
|
||||
}
|
||||
|
||||
public void setTechnologies(ArrayList<String> technologies) {
|
||||
this.technologies = technologies;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Bayesian{" +
|
||||
"type='" + type + '\'' +
|
||||
", proficiencies=" + proficiencies +
|
||||
", technologies=" + technologies +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package database;
|
||||
|
||||
import org.springframework.data.mongodb.repository.MongoRepository;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public interface BayesianRepo extends MongoRepository<Bayesian,String> {
|
||||
ArrayList<Bayesian> findAll();
|
||||
ArrayList<Bayesian> findAllByType(String type);
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
package database;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
@Repository
|
||||
public class BayesianSet {
|
||||
@Autowired
|
||||
private BayesianRepo bayesianRepo;
|
||||
private HashMap<String,Bayesian> bayesianHashMap = new HashMap<>();
|
||||
|
||||
public BayesianSet() {
|
||||
}
|
||||
|
||||
public BayesianSet(HashMap<String, Bayesian> bayesianHashMap) {
|
||||
this.bayesianHashMap = bayesianHashMap;
|
||||
}
|
||||
|
||||
public HashMap<String, Bayesian> getBayesianHashMap() {
|
||||
return bayesianHashMap;
|
||||
}
|
||||
|
||||
public void setBayesianHashMap(HashMap<String, Bayesian> bayesianHashMap) {
|
||||
this.bayesianHashMap = bayesianHashMap;
|
||||
}
|
||||
|
||||
public void init(){
|
||||
for(Bayesian bayesian: bayesianRepo.findAll()){
|
||||
bayesianHashMap.put(bayesian.getType(),bayesian);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,30 +1,26 @@
|
||||
package init;
|
||||
|
||||
import config.CrawlerConfig;
|
||||
import database.*;
|
||||
import helper.Timer;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tool.JobCrawler;
|
||||
import tool.JobInfoHandle;
|
||||
|
||||
|
||||
@Order(1)
|
||||
@Component
|
||||
public class Start implements CommandLineRunner {
|
||||
@Autowired
|
||||
UserSet us;
|
||||
@Autowired
|
||||
CompanyRepo cr;
|
||||
@Autowired
|
||||
JobRepo jr;
|
||||
private UserSet us;
|
||||
//@Autowired
|
||||
//private BayesianSet bs;
|
||||
@Autowired
|
||||
JobCrawler crawler;
|
||||
@Autowired
|
||||
CrawlerConfig config;
|
||||
|
||||
JobInfoHandle jih;
|
||||
@Override
|
||||
public void run(String... args){
|
||||
public void run(String... args) {
|
||||
us.init();
|
||||
//bs.init();
|
||||
jih.test();
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
package tool;
|
||||
import com.alibaba.fastjson.JSON
|
||||
import kotlinx.coroutines.*
|
||||
import org.jsoup.Jsoup
|
||||
import java.io.*
|
||||
|
||||
suspend fun main() = coroutineScope {
|
||||
|
||||
val books = ArrayList<Book>()
|
||||
val userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36"
|
||||
for(i in 0..49){
|
||||
val url = "https://book.douban.com/tag/程序?start=${20*i}&type=T"
|
||||
var doc = Jsoup.connect(url).header("User-Agent",userAgent).get()
|
||||
val lis = doc.select("li.subject-item")
|
||||
|
||||
for(li in lis){
|
||||
launch {
|
||||
val name = li.select(".info>h2>a").attr("title")
|
||||
val author = li.select(".info>.pub").text()
|
||||
val pic = li.select(".pic>.nbg>img").attr("src")
|
||||
val desc = li.select(".info>p").text()
|
||||
books.add(Book(name,author,desc,pic))
|
||||
println("name: $name author: $author pic: $pic desc: $desc")
|
||||
}
|
||||
}
|
||||
}
|
||||
write(File("E:\\repo\\data\\json\\book\\program.json"), JSON.toJSONString(books),"UTF-8")
|
||||
}
|
||||
|
||||
@Throws(IOException::class)
|
||||
fun write(file: File, content: String, encoding: String) {
|
||||
file.delete()
|
||||
file.createNewFile()
|
||||
val writer = BufferedWriter(OutputStreamWriter(
|
||||
FileOutputStream(file), encoding))
|
||||
writer.write(content)
|
||||
writer.close()
|
||||
}
|
||||
|
||||
data class Book(
|
||||
val name: String,
|
||||
val author: String,
|
||||
val desc: String,
|
||||
val pic: String
|
||||
)
|
||||
@ -0,0 +1,46 @@
|
||||
package tool;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.CustomDictionary;
|
||||
import database.Bayesian;
|
||||
import database.BayesianSet;
|
||||
import database.JobRepo;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
@Component
|
||||
public class JobInfoHandle {
|
||||
@Autowired
|
||||
JobRepo jobRepo;
|
||||
@Autowired
|
||||
BayesianSet bayesianSet;
|
||||
|
||||
|
||||
public void test(){
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void as(String jobInfo,String type){
|
||||
ArrayList<String> proficiencies = bayesianSet.getBayesianHashMap().get(type).getProficiencies();
|
||||
ArrayList<String> technologies = bayesianSet.getBayesianHashMap().get(type).getTechnologies();
|
||||
for(String proficiency: proficiencies) {
|
||||
while(true){
|
||||
int index = jobInfo.indexOf(proficiency);
|
||||
if(index < 0) break;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,6 @@
|
||||
|
||||
import java.util.*
|
||||
|
||||
fun main(){
|
||||
print((Random().nextInt(1000000000 - 100000000) + 100000000).toString())
|
||||
}
|
||||
@ -0,0 +1,9 @@
|
||||
import java.util.Random;
|
||||
|
||||
public class Test {
|
||||
public static void main(String[] args) {
|
||||
String id = String.valueOf(new Random().nextInt(1000000000 - 100000000) + 100000000);
|
||||
System.out.println(id);
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
Reference in new issue