add api(/users/job_names)

master
ourEmpire 6 years ago
parent 6c9ae28777
commit ce47ac2ae2

2
server/.gitignore vendored

@ -1,2 +0,0 @@
.gradle/
.idea/

@ -6,16 +6,17 @@ import org.springframework.boot.gradle.tasks.bundling.BootWar
plugins {
java
war
kotlin("jvm") version "1.2.71"
kotlin("plugin.allopen") version "1.2.71"
kotlin("plugin.jpa") version "1.2.71"
kotlin("plugin.spring") version "1.2.71"
kotlin("jvm") version "1.3.50"
kotlin("plugin.allopen") version "1.3.50"
kotlin("plugin.jpa") version "1.3.50"
kotlin("plugin.spring") version "1.3.50"
id("org.springframework.boot") version "2.1.7.RELEASE" apply false
id("io.spring.dependency-management") version "1.0.8.RELEASE"
}
buildscript {
repositories {
var kotlinVersion = "1.3.50"
repositories {
mavenCentral()
}
dependencies {
@ -65,8 +66,10 @@ dependencies {
implementation("org.apache.httpcomponents:httpclient:4.5.9")
implementation("org.apache.httpcomponents:httpcore:4.4.11")
implementation("org.jsoup:jsoup:1.12.1")
implementation("top.ourfor:lib:+")
implementation("org.springframework.boot:spring-boot-starter-data-mongodb")
implementation("com.hankcs:hanlp:portable-1.7.5")
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.3.2")
}
allOpen {
@ -97,7 +100,7 @@ tasks.withType<KotlinCompile> {
}
val war: War by tasks
war.archiveName = "job.war"
war.archiveName = "api.rest.war"
tasks.getByName<War>("war") {
enabled = true

@ -3,18 +3,20 @@ package config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.HashMap;
@Component
@ConfigurationProperties(prefix="crawler")
public class CrawlerConfig {
private HashMap<String,String> job51;
private ArrayList<String> douban;
public CrawlerConfig() {
}
public CrawlerConfig(HashMap<String, String> job51) {
public CrawlerConfig(HashMap<String, String> job51,ArrayList<String> douban) {
this.job51 = job51;
this.douban = douban;
}
public HashMap<String, String> getJob51() {
@ -25,5 +27,11 @@ public class CrawlerConfig {
this.job51 = job51;
}
public ArrayList<String> getDouban() {
return douban;
}
public void setDouban(ArrayList<String> douban) {
this.douban = douban;
}
}

@ -0,0 +1,20 @@
package config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
@Component
@ConfigurationProperties(prefix="job")
public class JobConfig {
private ArrayList<String> jobNames;
public ArrayList<String> getJobNames() {
return jobNames;
}
public void setJobNames(ArrayList<String> jobNames) {
this.jobNames = jobNames;
}
}

@ -73,5 +73,4 @@ public class AdminCtl {
}
}

@ -47,11 +47,24 @@ public class CommentCtl implements Ctl {
//将不良评论和谐掉
private String harmony(String content){
public String harmony(String content){
for(String word : words.getWords()){
content = content.replaceAll(word,"**");
int index = content.indexOf(word);
while (index > 0) {
StringBuffer buffer = new StringBuffer(content.substring(0, index));
String right = content.substring(index + word.length());
char[] chars = new char[word.length()];
for (int i = 0; i < word.length(); i++) {
chars[i] = '*';
}
content = new String(buffer.append(chars).append(right));
index = content.indexOf(word);
}
}
return content;
}
}

@ -76,9 +76,8 @@ public class EmailCtl implements Ctl {
public boolean check(String email,int verify){
Repo repo = repos.get(email);
long nowTime = System.currentTimeMillis();
if(repo.verify == verify && nowTime - repo.timeStamp < MAX_LIVE){
if((repo.verify == verify) && (nowTime - repo.timeStamp < MAX_LIVE))
return true;
}
return false;
}

@ -11,6 +11,8 @@ import message.StateCode;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import java.util.Random;
@RequestMapping("/users")
@RestController
public class UserCtl implements Ctl{
@ -52,12 +54,11 @@ public class UserCtl implements Ctl{
}
@PostMapping("/create")
public Message create(@RequestParam String id,
@RequestParam String username,
public Message create(@RequestParam String username,
@RequestParam String password,
@RequestParam String email,
@RequestParam(required = false) String phone,
@RequestParam String sex,
@RequestParam(required = false) String sex,
@RequestParam Integer verify) {
Message message = messages[Messages.POST];
EmailCtl emailCtl = new EmailCtl();
@ -66,12 +67,11 @@ public class UserCtl implements Ctl{
.setMsg("验证码失效或错误!")
.setData(null);
}
//查找数据库中是否有id的用户
if(us.getUserMap().get(id) != null){
return message.setMsg("the user "+id+"is existed")
.setCode(StateCode.FORBIDDEN)
.setData(null);
String id;
while(true) {
//产生一个9位的随机数作为id
id = String.valueOf(new Random().nextInt(1000000000 - 100000000) + 100000000);
if(us.getUserMap().get(id) == null)break;
}
User newUser = new User(id,username,password,email,phone,sex);
us.getUserMap().put(id,newUser);

@ -1,6 +1,7 @@
package controller;
import com.alibaba.fastjson.JSON;
import config.JobConfig;
import database.*;
import message.Messages;
import message.Messages.Message;
@ -25,12 +26,14 @@ public class UserOpCtl {
BookRepo br;
@Autowired
NodeRepo nr;
@Autowired
JobConfig jobConfig;
private Message[] messages = new Messages().getMessages();
@GetMapping("/jc")
public String viewJobAndCompany(@RequestParam String jobName){
public String viewCompany(@RequestParam String jobName){
Message message = messages[Messages.GET];
//存储返回的页面信息
@ -75,7 +78,13 @@ public class UserOpCtl {
}
}
@GetMapping("/job_names")
public Message getAllJobName(){
Message message = messages[Messages.GET];
return message.setCode(StateCode.OK)
.setMsg("option successfully")
.setData(jobConfig.getJobNames());
}
@GetMapping("/location")
public Message getLocate() {

@ -0,0 +1,58 @@
package database;
import javax.persistence.Entity;
import javax.persistence.Id;
import java.util.ArrayList;
@Entity
public class Bayesian {
@Id
//以company中的type作为id便于搜索
private String type;
//存储熟练度的分类
private ArrayList<String> proficiencies;
//存储技术
private ArrayList<String> technologies;
public Bayesian() {
}
public Bayesian(String type, ArrayList<String> proficiencies, ArrayList<String> technologies) {
this.type = type;
this.proficiencies = proficiencies;
this.technologies = technologies;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public ArrayList<String> getProficiencies() {
return proficiencies;
}
public void setProficiencies(ArrayList<String> proficiencies) {
this.proficiencies = proficiencies;
}
public ArrayList<String> getTechnologies() {
return technologies;
}
public void setTechnologies(ArrayList<String> technologies) {
this.technologies = technologies;
}
@Override
public String toString() {
return "Bayesian{" +
"type='" + type + '\'' +
", proficiencies=" + proficiencies +
", technologies=" + technologies +
'}';
}
}

@ -0,0 +1,10 @@
package database;
import org.springframework.data.mongodb.repository.MongoRepository;
import java.util.ArrayList;
public interface BayesianRepo extends MongoRepository<Bayesian,String> {
ArrayList<Bayesian> findAll();
ArrayList<Bayesian> findAllByType(String type);
}

@ -0,0 +1,36 @@
package database;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;
import java.util.ArrayList;
import java.util.HashMap;
@Repository
public class BayesianSet {
@Autowired
private BayesianRepo bayesianRepo;
private HashMap<String,Bayesian> bayesianHashMap = new HashMap<>();
public BayesianSet() {
}
public BayesianSet(HashMap<String, Bayesian> bayesianHashMap) {
this.bayesianHashMap = bayesianHashMap;
}
public HashMap<String, Bayesian> getBayesianHashMap() {
return bayesianHashMap;
}
public void setBayesianHashMap(HashMap<String, Bayesian> bayesianHashMap) {
this.bayesianHashMap = bayesianHashMap;
}
public void init(){
for(Bayesian bayesian: bayesianRepo.findAll()){
bayesianHashMap.put(bayesian.getType(),bayesian);
}
}
}

@ -8,15 +8,21 @@ public class Book {
@Id
private String id;
private String name;
private String url;
private String author;
private String bookUrl; //书详细的介绍的url
private String info; //书的简介
private String coverUrl;//书封面的url
public Book() {
}
public Book(String id,String name, String url) {
public Book(String id, String name, String author,String bookUrl, String info, String coverUrl) {
this.id = id;
this.name = name;
this.url = url;
this.author = author;
this.bookUrl = bookUrl;
this.info = info;
this.coverUrl = coverUrl;
}
public String getId() {
@ -35,12 +41,36 @@ public class Book {
this.name = name;
}
public String getUrl() {
return url;
public String getAuthor() {
return author;
}
public void setUrl(String url) {
this.url = url;
public void setAuthor(String author) {
this.author = author;
}
public String getBookUrl() {
return bookUrl;
}
public void setBookUrl(String bookUrl) {
this.bookUrl = bookUrl;
}
public String getInfo() {
return info;
}
public void setInfo(String info) {
this.info = info;
}
public String getCoverUrl() {
return coverUrl;
}
public void setCoverUrl(String coverUrl) {
this.coverUrl = coverUrl;
}
@Override
@ -48,7 +78,10 @@ public class Book {
return "Book{" +
"id='" + id + '\'' +
", name='" + name + '\'' +
", url='" + url + '\'' +
", author='" + author + '\'' +
", bookUrl='" + bookUrl + '\'' +
", info='" + info + '\'' +
", coverUrl='" + coverUrl + '\'' +
'}';
}
}

@ -7,4 +7,5 @@ import java.util.ArrayList;
public interface JobRepo extends MongoRepository<Job,String> {
ArrayList<Job> findAllById(String id);
ArrayList<Job> findAll();
ArrayList<Job> findAllByJobName(String jobName);
}

@ -35,15 +35,13 @@ public class JWTUtil {
/**
* token
* @param token
* @param id id
* @return
*/
public boolean verify(String token,String id){
public boolean verify(String token){
try {
Algorithm algorithm = Algorithm.HMAC256(secret);
JWTVerifier verifier = JWT.require(algorithm)
.withClaim("id", id)
.build();
verifier.verify(token);
return true;

@ -25,8 +25,9 @@ public class Timer {
@Autowired
private CrawlerConfig crawlerConfig;
//每隔1h将缓存数据写入数据库
@Scheduled(initialDelay = 1000 * 60 * 60,fixedDelay = 1000 * 60 * 60)
@Scheduled(initialDelay = 1000 * 60 * 60L,fixedRate = 1000* 60 * 60L)
public void UserInfoUpdate(){
ArrayList<User> userList = us.getUserList();
userList.clear();
@ -41,6 +42,8 @@ public class Timer {
deleteByIdList.clear();
}
@Scheduled(cron = "0 0 3 * * ?")
public void CompanyInfoUpdate(){
HashMap<String,String> job51 = crawlerConfig.getJob51();

@ -1,30 +1,26 @@
package init;
import config.CrawlerConfig;
import database.*;
import helper.Timer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import tool.JobCrawler;
import tool.JobInfoHandle;
@Order(1)
@Component
public class Start implements CommandLineRunner {
@Autowired
UserSet us;
@Autowired
CompanyRepo cr;
@Autowired
JobRepo jr;
private UserSet us;
//@Autowired
//private BayesianSet bs;
@Autowired
JobCrawler crawler;
@Autowired
CrawlerConfig config;
JobInfoHandle jih;
@Override
public void run(String... args){
public void run(String... args) {
us.init();
//bs.init();
jih.test();
}
}

@ -0,0 +1,75 @@
package tool;
/**
* id
*
*/
import database.Book;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class BookCrawler {
//爬取的网址
//https://read.douban.com/category?page=1&kind=105
/**
* @param type
* @param pages
*/
public ArrayList<Book> getBooks(String type,int pages) throws IOException {
ArrayList<Book> books = new ArrayList<>();
for(int index = 0;index < pages;index++){
String url = jointUrl(type,index);
Document doc = Jsoup.connect(url).get();
doc.outputSettings().charset("UTF-8");
Elements elements = doc.select(".clearfix .article #subject_list .subject-list");
if(elements.size()==0){
continue;
}
System.out.println("elements length: "+elements.size());
for(Element element: elements){
String bookUrl = element.select(".subject-item .pic .nbg").attr("href");
if(bookUrl==null){break;}
String id = getId(bookUrl);
String name = element.select(".subject-item .info h2>a").attr("title");
String coverUrl = element.select(".subject-item .pic .nbg img").attr("src");
String author = element.select(".subject-item .info .pub").text();
String info = element.select(".subject-item .info>p").text();
Book book = new Book(id,name,author,bookUrl,info,coverUrl);
System.out.println(new StringBuilder("book: ").append(book.toString()));
books.add(book);
try {
Thread.sleep(1500);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
return books;
}
private String jointUrl(String type,int index){
StringBuffer buffer = new StringBuffer("https://book.douban.com/tag/");
buffer.append(type).append("?start=").append(index).append("&type=T");
return new String(buffer);
}
//从bookUrl中提取出书的id
//https://book.douban.com/subject/25985021/ 截取数字为书的id
private String getId(String bookUrl){
String regEx="[^0-9]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(bookUrl);
return m.replaceAll("").trim();
}
}

@ -0,0 +1,45 @@
package tool;
import com.alibaba.fastjson.JSON
import kotlinx.coroutines.*
import org.jsoup.Jsoup
import java.io.*
suspend fun main() = coroutineScope {
val books = ArrayList<Book>()
val userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36"
for(i in 0..49){
val url = "https://book.douban.com/tag/程序?start=${20*i}&type=T"
var doc = Jsoup.connect(url).header("User-Agent",userAgent).get()
val lis = doc.select("li.subject-item")
for(li in lis){
launch {
val name = li.select(".info>h2>a").attr("title")
val author = li.select(".info>.pub").text()
val pic = li.select(".pic>.nbg>img").attr("src")
val desc = li.select(".info>p").text()
books.add(Book(name,author,desc,pic))
println("name: $name author: $author pic: $pic desc: $desc")
}
}
}
write(File("E:\\repo\\data\\json\\book\\program.json"), JSON.toJSONString(books),"UTF-8")
}
@Throws(IOException::class)
fun write(file: File, content: String, encoding: String) {
file.delete()
file.createNewFile()
val writer = BufferedWriter(OutputStreamWriter(
FileOutputStream(file), encoding))
writer.write(content)
writer.close()
}
data class Book(
val name: String,
val author: String,
val desc: String,
val pic: String
)

@ -9,10 +9,8 @@ import database.Job;
import org.jsoup.nodes.*;
import org.jsoup.*;
import org.jsoup.select.Elements;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
@RequestMapping("/users/test")
import org.springframework.stereotype.Component;
@Component
public class JobCrawler {
public ArrayList<Company> getCompanies(String url,String type)throws Exception {
ArrayList<Company> companies = new ArrayList<>();
@ -20,7 +18,7 @@ public class JobCrawler {
doc.outputSettings().charset("UTF-8");
Elements elements = doc.select(".dw_table .el");
elements.remove(0);
elements.first().text();
elements.first().text();//抛空指针异常,控制页面不在爬取
for (Element element : elements) {
Elements els = element.select(".t1>span>a[title]");
String jobName = els.attr("title");

@ -0,0 +1,46 @@
package tool;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import database.Bayesian;
import database.BayesianSet;
import database.JobRepo;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
@Component
public class JobInfoHandle {
@Autowired
JobRepo jobRepo;
@Autowired
BayesianSet bayesianSet;
public void test(){
}
public void as(String jobInfo,String type){
ArrayList<String> proficiencies = bayesianSet.getBayesianHashMap().get(type).getProficiencies();
ArrayList<String> technologies = bayesianSet.getBayesianHashMap().get(type).getTechnologies();
for(String proficiency: proficiencies) {
while(true){
int index = jobInfo.indexOf(proficiency);
if(index < 0) break;
}
}
}
}

@ -26,11 +26,10 @@
"children": [
{"name": "javaScript DOM 编程艺术", "value": 1801050,
"children":[
{"name": "腾讯网UED体验设计之旅","value": 1445661,
"name": "形式感+:网页视觉设计创意拓展与快速表现","value": 2178611,
"name": "在你身边,为你设计:腾讯的用户体验设计之道","value": 25863515,
"name": "HTML与CSS进阶教程","value": 25752357,
}
{"name": "腾讯网UED体验设计之旅","value": 1445661},
{"name": "形式感+:网页视觉设计创意拓展与快速表现","value": 2178611},
{"name": "在你身边,为你设计:腾讯的用户体验设计之道","value": 25863515},
{"name": "HTML与CSS进阶教程","value": 25752357}
]
}
]

@ -55,14 +55,51 @@ crawler:
"UI_designer": "UI%25E8%25AE%25BE%25E8%25AE%25A1%25E5%25B8%2588",
"software_engineer": "%25E8%25BD%25AF%25E4%25BB%25B6%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588",
"software_tester": "%25E8%25BD%25AF%25E4%25BB%25B6%25E6%25B5%258B%25E8%25AF%2595%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588"
}
}
douban:
- "科普"
- "互联网"
- "编程"
- "科学"
- "交互设计"
- "用户体验"
- "算法"
- "科技"
- "web"
- "交互"
- "通信"
- "UE"
- "UCD"
- "神经网络"
- "程序"
job:
jobNames:
- "软件工程师"
- "UI设计师"
- "产品经理"
- "项目经理"
- "软件测试师"
ai:
proficiency:
- "了解"
- "具备"
- "熟悉"
- "熟练使用"
- "掌握"
- "精通"
technology:
- "HTML"
- "JavaScript"
- "软件测试技术"
- "Quartz"
comment:
words:
- "mmp"
- "操你"
- "操你"
- "尼玛"
- "艹"
- "他妈的"

@ -0,0 +1,6 @@
import java.util.*
fun main(){
print((Random().nextInt(1000000000 - 100000000) + 100000000).toString())
}

@ -0,0 +1,9 @@
import java.util.Random;
public class Test {
public static void main(String[] args) {
String id = String.valueOf(new Random().nextInt(1000000000 - 100000000) + 100000000);
System.out.println(id);
}
}
Loading…
Cancel
Save