|
|
# 目前工商上线处理流程
|
|
|
|
|
|
|
|
|
```plantuml
|
|
|
|
|
|
@startuml
|
|
|
|
|
|
file spider_log
|
|
|
|
|
|
package mongo {
|
|
|
database "ic" #green
|
|
|
database "partner" #green
|
|
|
database "employee" #green
|
|
|
database "其他分维度表" #green
|
|
|
}
|
|
|
|
|
|
package mysql {
|
|
|
database ic_base #green
|
|
|
database ic_biz #green
|
|
|
}
|
|
|
queue "topic: collie-ic-data-to-mongo" #red
|
|
|
queue "topic: collie-ic-crawler-data" #red
|
|
|
queue "topic: collie-ic-crawler-data-std" #red
|
|
|
queue "topic: ic_spider_all" #red
|
|
|
|
|
|
cloud hudi
|
|
|
cloud 企业动态监控
|
|
|
database company_contact_detail #green
|
|
|
database "es: company" #lightgreen
|
|
|
database "es: collie-ic-crawler-meta-*" #lightgreen
|
|
|
|
|
|
[spider_log] --> [topic: collie-ic-data-to-mongo]: logstash
|
|
|
[topic: collie-ic-data-to-mongo] --> [udm: ic_spider_data]: consumer多进程
|
|
|
[udm: ic_spider_data] --> [ic]
|
|
|
[udm: ic_spider_data] --> [partner]
|
|
|
[udm: ic_spider_data] --> [employee]
|
|
|
[udm: ic_spider_data] --> [其他分维度表]
|
|
|
[ic] --> [hudi]
|
|
|
[partner] --> [hudi]
|
|
|
[employee] --> [hudi]
|
|
|
[其他分维度表] --> [hudi]
|
|
|
[udm: ic_spider_data] --> [企业动态监控]
|
|
|
[udm: ic_spider_data] --> [topic: collie-ic-crawler-data]
|
|
|
[udm: ic_spider_data] --> [es: company]: 通过接口打标签
|
|
|
[topic: collie-ic-crawler-data] --> [udm: crawler_to_hdfs]: consumer单进程
|
|
|
[topic: collie-ic-crawler-data] --> [企业动态监控]: udm: parse_change_content
|
|
|
[udm: crawler_to_hdfs] --> [topic: collie-ic-crawler-data-std]
|
|
|
[topic: collie-ic-crawler-data-std] --> [udm: update_company_index]: data_pump
|
|
|
[topic: collie-ic-crawler-data-std] --> [es: collie-ic-crawler-meta-*]: logstash
|
|
|
[udm: update_company_index] --> [es: company]
|
|
|
[topic: collie-ic-crawler-data-std] --> [company_contact_detail]: consumer
|
|
|
[topic: collie-ic-crawler-data] --> [udm: crawler_to_hdfs(1)+cleaning]: data_pump多进程
|
|
|
[udm: crawler_to_hdfs(1)+cleaning] --> [topic: ic_spider_all]
|
|
|
[topic: ic_spider_all] --> [udm: sync_mysql_new]: data_pump多进程
|
|
|
[udm: sync_mysql_new] --> [ic_base]
|
|
|
[udm: sync_mysql_new] --> [ic_biz]
|
|
|
|
|
|
|
|
|
@enduml
|
|
|
```
|
|
|
|
|
|
```plantuml
|
|
|
@startuml
|
|
|
|
|
|
package ic_ods {
|
|
|
database company_base
|
|
|
database company_legalperson
|
|
|
database company_partner
|
|
|
database company_...
|
|
|
}
|
|
|
|
|
|
package ic_mongo {
|
|
|
database ic
|
|
|
database partner
|
|
|
database employee
|
|
|
database "其他分维度表"
|
|
|
}
|
|
|
|
|
|
|
|
|
package ic_mysql2B {
|
|
|
database ic_base
|
|
|
database ic_biz
|
|
|
}
|
|
|
|
|
|
file spider_log
|
|
|
queue "topic: collie-ic-data-to-mongo" #red
|
|
|
queue "topic: collie-ic-data-digest"
|
|
|
queue "topic: ic-data-clean-result"
|
|
|
queue "topic: ic_ods_binlog"
|
|
|
|
|
|
[spider_log] --> [topic: collie-ic-data-to-mongo]: logstash
|
|
|
|
|
|
[topic: collie-ic-data-to-mongo] --> [udm: register_digest]: 单进程
|
|
|
[udm: register_digest] --> [company_base]: 新digest入库
|
|
|
[udm: register_digest] --> [topic: collie-ic-data-digest]
|
|
|
[topic: collie-ic-data-digest] --> [udms: ic_spider_data]: 多进程清洗
|
|
|
[udms: ic_spider_data] --> [topic: ic-data-clean-result]
|
|
|
[topic: ic-data-clean-result] --> [udm: sync_mysql_new]: 多进程入库
|
|
|
[udm: sync_mysql_new] --> [company_base]
|
|
|
[udm: sync_mysql_new] --> [company_legalperson]
|
|
|
[udm: sync_mysql_new] --> [company_partner]
|
|
|
[udm: sync_mysql_new] --> [company_...]
|
|
|
|
|
|
[company_base] --> [topic: ic_ods_binlog]: 一表一分区
|
|
|
[company_legalperson] --> [topic: ic_ods_binlog]: 一表一分区
|
|
|
[company_partner] --> [topic: ic_ods_binlog]: 一表一分区
|
|
|
[company_...] --> [topic: ic_ods_binlog]: 一表一分区
|
|
|
[topic: ic_ods_binlog] --> [udm: ic_update_data]: 多进程
|
|
|
[udm: ic_update_data] --> [topic: ic_update_data]: digest + 子维度信息
|
|
|
|
|
|
[topic: ic_update_data] --> [ic]
|
|
|
[topic: ic_update_data] --> [partner]
|
|
|
[topic: ic_update_data] --> [employee]
|
|
|
[topic: ic_update_data] --> [其他分维度表]
|
|
|
|
|
|
[topic: ic_update_data] --> [ic_base]
|
|
|
[topic: ic_update_data] --> [ic_biz]
|
|
|
@enduml
|
|
|
```
|
|
|
|