InitExamineAddress.java
12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
package com.taover.ai.scaffold;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.springframework.jdbc.core.JdbcTemplate;
import com.alibaba.druid.pool.DruidDataSource;
import com.aliyun.oss.OSSClient;
import com.aliyun.oss.model.OSSObject;
import com.taover.ai.bean.normal.ResultDetail;
import com.taover.ai.bean.normal.ZtoAnalysisResponseData;
import com.taover.ai.client.LocalClient;
import com.taover.ai.client.ZtoClient;
import com.taover.ai.util.UtilMath;
import com.taover.util.UtilExcel;
public class InitExamineAddress {
public static String cursorPath = "D:\\workdata\\channel_upload_excel\\deal_cursor.txt";
public static String dataDir = "D:\\workdata\\channel_upload_excel\\data";
public static String detailAddressFile = "D:\\workdata\\channel_upload_excel\\detail_address.txt";
public static String ztoCursorPath = "D:\\workdata\\channel_upload_excel\\zto_cursor.txt";
public static String errorPath = "D:\\workdata\\channel_upload_excel\\error.txt";
public static JdbcTemplate template = getJdbcTemplate();
public static void main(String[] args) {
//downExcel();
//dealDataFile();
//ztoAnalysis();
}
public static void ztoAnalysis() {
//读取地址数据
List<String> detailList = readDetailAddress();
int startCursor = Integer.valueOf(readCursor(ztoCursorPath))+1;
for(int i=startCursor; i<detailList.size(); ++i) {
String originAddress = detailList.get(i);
if(originAddress == null || originAddress.length() < 10) {
continue;
}
try {
//获取中通解析结果
ZtoAnalysisResponseData ztoResponse = ZtoClient.getAnalysisResult(originAddress);
//中通结果入库
insertZtoAnalysisRes(i, ztoResponse, originAddress);
//获取本地服务器解析结果
ZtoAnalysisResponseData localResponse = LocalClient.getAnalysisResult(originAddress);
//计算相似度
int[] score = calcScore(ztoResponse, localResponse);
//比对结果入库
insertCalcResult(i, localResponse, score);
} catch (Exception e) {
appendError(e);
}
writeCursor(ztoCursorPath, i+"");
}
}
private static void insertZtoAnalysisRes(int id, ZtoAnalysisResponseData response, String originText) {
String insertSql = "insert into examine_address(id, origin_text, type, "
+ " province, city, district, detail_address) "
+ " value(?, ?, 1, "
+ " ?, ?, ?, ?)";
List<ResultDetail> resultList = response.getResult();
if(resultList != null && resultList.size() > 0) {
ResultDetail item = resultList.get(0);
template.update(insertSql, id, originText, item.getProvince(), item.getCity(), item.getDistrict(), item.getAddress());
}
}
private static int[] calcScore(ZtoAnalysisResponseData ztoRes, ZtoAnalysisResponseData localRes) {
String ztoProvince = "";
String ztoCity = "";
String ztoDistrict = "";
String ztoAddress = "";
String localProvince = "";
String localCity = "";
String localDistrict = "";
String localAddress = "";
if(ztoRes != null && ztoRes.getResult() != null && ztoRes.getResult().size() > 0) {
ResultDetail item = ztoRes.getResult().get(0);
ztoProvince = item.getProvince();
ztoCity = item.getCity();
ztoDistrict = item.getDistrict();
ztoAddress = item.getAddress();
}
if(localRes != null && localRes.getResult() != null && localRes.getResult().size() > 0) {
ResultDetail item = localRes.getResult().get(0);
localProvince = item.getProvince();
localCity = item.getCity();
localDistrict = item.getDistrict();
localAddress = item.getAddress();
}
return new int[] {(int) (100*UtilMath.getSimilarityRatio(localProvince, ztoProvince)),
(int) (100*UtilMath.getSimilarityRatio(localCity, ztoCity)),
(int) (100*UtilMath.getSimilarityRatio(localDistrict, ztoDistrict)),
(int) (100*UtilMath.getSimilarityRatio(localAddress, ztoAddress))};
}
private static void insertCalcResult(int id, ZtoAnalysisResponseData response, int[] score) {
String insertSql = "insert into examine_address_result(id, address_id, batch_no, "
+ " province, city, district, detail_address, score_province,"
+ " score_city, score_district, score_address) "
+ " value(?, ?, '12', "
+ " ?, ?, ?, ?, ?,"
+ " ?, ?, ?)";
List<ResultDetail> resultList = response.getResult();
if(resultList != null && resultList.size() > 0) {
ResultDetail item = resultList.get(0);
template.update(insertSql, id, id, item.getProvince(), item.getCity(), item.getDistrict(), item.getAddress(), score[0], score[1], score[2], score[3]);
}
}
private static JdbcTemplate getJdbcTemplate() {
DruidDataSource dataSource = new DruidDataSource();
//设置相应的参数
//1、数据库驱动类
dataSource.setDriverClassName("com.mysql.jdbc.Driver");
//2、url,用户名,密码
dataSource.setUrl("jdbc:mysql://192.168.3.189:3306/8zyun_ai?characterEncoding=utf-8");
dataSource.setUsername("root");
dataSource.setPassword("root");
//3、初始化连接大小
dataSource.setInitialSize(1);
//5、连接池最大小空闲
dataSource.setMinIdle(1);
dataSource.setMaxIdle(20);
//7、指明连接是否被空闲连接回收器(如果有)进行检验
dataSource.setPoolPreparedStatements(true);
//8、运行一次空闲连接回收器的时间间隔(60秒)
dataSource.setTimeBetweenEvictionRunsMillis(60 * 1000);
//9、验证时使用的SQL语句
dataSource.setValidationQuery("SELECT 1 FROM DUAL");
//10、借出连接时不要测试,否则很影响性能
//11、申请连接的时候检测,如果空闲时间大于 timeBetweenEvictionRunsMillis,执行validationQuery检测连接是否有效
dataSource.setTestWhileIdle(false);
JdbcTemplate jdbcTemplate = new JdbcTemplate(dataSource);
return jdbcTemplate;
}
private static List<String> readDetailAddress(){
List<String> result = new ArrayList<String>();
try {
FileReader fr;
File detailAddress = new File(detailAddressFile);
fr = new FileReader(detailAddress);
BufferedReader br = new BufferedReader(fr);
String tempLine = br.readLine();
while(tempLine != null) {
result.add(tempLine);
tempLine = br.readLine();
}
br.close();
fr.close();
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
public static void dealDataFile() {
File dataDir = new File(InitExamineAddress.dataDir);
if(dataDir.isDirectory()) {
String[] sonPathList = dataDir.list();
String pathStr = "["+StringUtils.join(sonPathList, "][")+"]";
int startCursor = Integer.valueOf(readCursor(cursorPath))+1;
for(int i=startCursor; i<4340; ++i) {
try {
String dataFileName = getDataFileName(pathStr, i);
List<List<Object>> sonData = UtilExcel.readExcel(dataDir+"\\"+dataFileName);
List<String> detailAddressData = getDetailAddressList(sonData);
appendDetailAddress(detailAddressData);
writeCursor(cursorPath, i+"");
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
private static void appendDetailAddress(List<String> detailAddressList) throws Exception{
File detailAddress = new File(detailAddressFile);
if(!detailAddress.exists()) {
detailAddress.createNewFile();
}
FileWriter fw = new FileWriter(detailAddress, true);
for(int i=0; i<detailAddressList.size(); ++i) {
fw.append(detailAddressList.get(i)+"\n");
}
fw.close();
}
private static void appendError(Exception e) {
try {
File detailAddress = new File(errorPath);
if(!detailAddress.exists()) {
detailAddress.createNewFile();
}
FileWriter fw = new FileWriter(detailAddress, true);
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
fw.append(sw.toString());
sw.close();
fw.close();
}catch(Exception e1) {
e1.printStackTrace();
}
}
private static List<String> getDetailAddressList(List<List<Object>> addressList){
List<String> result = new ArrayList<String>();
if(addressList == null || addressList.size() == 0) {
return result;
}
List<Object> headerList = addressList.get(0);
List<Integer> addressIndexList = getAddressIndex(headerList);
for(int i=1; i<addressList.size() && i<10000; ++i) {
List<Object> addressItem = addressList.get(i);
for(int j=0; j<addressIndexList.size(); ++j) {
int addressIndexItem = addressIndexList.get(j);
if(addressIndexItem < addressItem.size()) {
Object detailItem = addressItem.get(addressIndexItem);
if(detailItem != null && detailItem.toString().length() > 10) {
result.add(detailItem.toString());
}
}
}
}
return result;
}
private static List<Integer> getAddressIndex(List<Object> data){
List<Integer> result = new ArrayList<Integer>();
if(data == null || data.size() == 0) {
return result;
}
for(int i=0; i<data.size() && i<24; ++i) {
Object item = data.get(i);
if(item == null) {
continue;
}
if(item.toString().contains("地址")) {
result.add(i);
}
}
return result;
}
private static String getDataFileName(String pathStr, int cursor) throws Exception {
String fileName = cursor+".xls";
if(pathStr.contains("["+fileName+"]")) {
return fileName;
}
fileName = cursor+".xlsx";
if(pathStr.contains("["+fileName+"]")) {
return fileName;
}
fileName = cursor+".XLS";
if(pathStr.contains("["+fileName+"]")) {
return fileName;
}
fileName = cursor+".XLSX";
if(pathStr.contains("["+fileName+"]")) {
return fileName;
}
throw new Exception("没有找到"+cursor+"对应的文件");
}
private static String readCursor(String cursorPath) {
File dealCursor = new File(cursorPath);
try {
if(!dealCursor.exists()) {
dealCursor.createNewFile();
}
FileReader fileReader = new FileReader(dealCursor);
BufferedReader bfr = new BufferedReader(fileReader);
String temp = bfr.readLine();
bfr.close();
fileReader.close();
if(temp == null || "".equals(temp)) {
return "-1";
}else {
return temp;
}
} catch (Exception e) {
e.printStackTrace();
}
return "-1";
}
private static void writeCursor(String cursorPath, String cursor) {
File dealCursor = new File(cursorPath);
try {
if(!dealCursor.exists()) {
dealCursor.createNewFile();
}
FileWriter fw = new FileWriter(dealCursor);
fw.write(cursor);
fw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void downExcel() {
//读取Excel url列表
List<String> urlList = getUrlListFromFile();
//遍历下载Excel文件
for(int i=0; i<urlList.size(); ++i) {
String urlItem = urlList.get(i);
downExcelToLocal(urlItem, "D:\\workdata\\channel_upload_excel\\data\\"+i+urlItem.substring(urlItem.lastIndexOf(".")));
}
}
private static void downExcelToLocal(String fileUrl, String destLocalFilePath) {
try {
InputStream is = getObject("ty-image-01", fileUrl);
BufferedInputStream bis = new BufferedInputStream(is);
byte[] bufferByte = new byte[1024];
int readLen = bis.read(bufferByte);
FileOutputStream fileOutput = new FileOutputStream(destLocalFilePath);
while(readLen != -1) {
fileOutput.write(bufferByte, 0, readLen);
readLen = bis.read(bufferByte);
}
fileOutput.close();
bis.close();
is.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static OSSClient client = new OSSClient("oss-cn-qingdao.aliyuncs.com", "H4fIVB56iHjR6zQw", "7bA395UltFp16kWPJT7Pfz0XYXCk4Q");
private static InputStream getObject(String bucketName, String key) throws IOException {
// 获取Object,返回结果为OSSObject对象
OSSObject object = client.getObject(bucketName, key);
// 获取Object的输入流
return object.getObjectContent();
}
private static List<String> getUrlListFromFile(){
List<String> urlList = new ArrayList<String>();
try {
File urlFile = new File("D:\\workdata\\channel_upload_excel\\address_file_url.txt");
FileReader reader = new FileReader(urlFile);
BufferedReader bufferReader = new BufferedReader(reader);
String tempUrl = bufferReader.readLine();
while(tempUrl != null) {
urlList.add(tempUrl);
tempUrl = bufferReader.readLine();
}
} catch (Exception e) {
e.printStackTrace();
}
return urlList;
}
}