InitExamineAddress.java 12.5 KB
package com.taover.ai.scaffold;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.springframework.jdbc.core.JdbcTemplate;

import com.alibaba.druid.pool.DruidDataSource;
import com.aliyun.oss.OSSClient;
import com.aliyun.oss.model.OSSObject;
import com.taover.ai.bean.normal.ResultDetail;
import com.taover.ai.bean.normal.ZtoAnalysisResponseData;
import com.taover.ai.client.LocalClient;
import com.taover.ai.client.ZtoClient;
import com.taover.ai.util.UtilMath;
import com.taover.util.UtilExcel;

public class InitExamineAddress {
	public static String cursorPath = "D:\\workdata\\channel_upload_excel\\deal_cursor.txt";
	public static String dataDir = "D:\\workdata\\channel_upload_excel\\data";
	public static String detailAddressFile = "D:\\workdata\\channel_upload_excel\\detail_address.txt";
	public static String ztoCursorPath = "D:\\workdata\\channel_upload_excel\\zto_cursor.txt";
	public static String errorPath = "D:\\workdata\\channel_upload_excel\\error.txt";
	public static JdbcTemplate template = getJdbcTemplate();
	
	public static void main(String[] args) {
		//downExcel();
		
		//dealDataFile();
		
		//ztoAnalysis();
	}
	
	public static void ztoAnalysis() {
		
		//读取地址数据
		List<String> detailList = readDetailAddress();
		int startCursor = Integer.valueOf(readCursor(ztoCursorPath))+1;
		for(int i=startCursor; i<detailList.size(); ++i) {
			String originAddress = detailList.get(i);
			if(originAddress == null || originAddress.length() < 10) {
				continue;
			}
			
			try {
				//获取中通解析结果
				ZtoAnalysisResponseData ztoResponse = ZtoClient.getAnalysisResult(originAddress);
				
				//中通结果入库
				insertZtoAnalysisRes(i, ztoResponse, originAddress);
				
				//获取本地服务器解析结果
				ZtoAnalysisResponseData localResponse = LocalClient.getAnalysisResult(originAddress);
				
				//计算相似度
				int[] score = calcScore(ztoResponse, localResponse);
				
				//比对结果入库
				insertCalcResult(i, localResponse, score);
			} catch (Exception e) {
				appendError(e);
			}
			writeCursor(ztoCursorPath, i+"");
		}
	}
	
	private static void insertZtoAnalysisRes(int id, ZtoAnalysisResponseData response, String originText) {
		String insertSql = "insert into examine_address(id, origin_text, type, "
				+ " province, city, district, detail_address) "
				+ " value(?, ?, 1, "
				+ " ?, ?, ?, ?)";
		List<ResultDetail> resultList = response.getResult();
		if(resultList != null && resultList.size() > 0) {
			ResultDetail item = resultList.get(0);
			template.update(insertSql, id, originText, item.getProvince(), item.getCity(), item.getDistrict(), item.getAddress());
		}
	}
	
	private static int[] calcScore(ZtoAnalysisResponseData ztoRes, ZtoAnalysisResponseData localRes) {
		String ztoProvince = "";
		String ztoCity = "";
		String ztoDistrict = "";
		String ztoAddress = "";
		String localProvince = "";
		String localCity = "";
		String localDistrict = "";
		String localAddress = "";
		if(ztoRes != null && ztoRes.getResult() != null && ztoRes.getResult().size() > 0) {
			ResultDetail item = ztoRes.getResult().get(0);
			ztoProvince = item.getProvince();
			ztoCity = item.getCity();
			ztoDistrict = item.getDistrict();
			ztoAddress = item.getAddress();
		}
		if(localRes != null && localRes.getResult() != null && localRes.getResult().size() > 0) {
			ResultDetail item = localRes.getResult().get(0);
			localProvince = item.getProvince();
			localCity = item.getCity();
			localDistrict = item.getDistrict();
			localAddress = item.getAddress();
		}
		return new int[] {(int) (100*UtilMath.getSimilarityRatio(localProvince, ztoProvince)),
				(int) (100*UtilMath.getSimilarityRatio(localCity, ztoCity)),
				(int) (100*UtilMath.getSimilarityRatio(localDistrict, ztoDistrict)),
				(int) (100*UtilMath.getSimilarityRatio(localAddress, ztoAddress))};
	}
	
	private static void insertCalcResult(int id, ZtoAnalysisResponseData response, int[] score) {
		String insertSql = "insert into examine_address_result(id, address_id, batch_no, "
				+ " province, city, district, detail_address, score_province,"
				+ " score_city, score_district, score_address) "
				+ " value(?, ?, '12', "
				+ " ?, ?, ?, ?, ?,"
				+ " ?, ?, ?)";
		List<ResultDetail> resultList = response.getResult();
		if(resultList != null && resultList.size() > 0) {
			ResultDetail item = resultList.get(0);
			template.update(insertSql, id, id, item.getProvince(), item.getCity(), item.getDistrict(), item.getAddress(), score[0], score[1], score[2], score[3]);
		}
	}

	private static JdbcTemplate getJdbcTemplate() {
		DruidDataSource dataSource = new DruidDataSource();        
        //设置相应的参数
        //1、数据库驱动类
        dataSource.setDriverClassName("com.mysql.jdbc.Driver");
        //2、url,用户名,密码
        dataSource.setUrl("jdbc:mysql://192.168.3.189:3306/8zyun_ai?characterEncoding=utf-8");
        dataSource.setUsername("root");
        dataSource.setPassword("root");
        //3、初始化连接大小
        dataSource.setInitialSize(1);
        //5、连接池最大小空闲
        dataSource.setMinIdle(1);
        dataSource.setMaxIdle(20);
        //7、指明连接是否被空闲连接回收器(如果有)进行检验
        dataSource.setPoolPreparedStatements(true);
        //8、运行一次空闲连接回收器的时间间隔(60秒)
        dataSource.setTimeBetweenEvictionRunsMillis(60 * 1000);
        //9、验证时使用的SQL语句
        dataSource.setValidationQuery("SELECT 1 FROM DUAL");
        //10、借出连接时不要测试,否则很影响性能
        //11、申请连接的时候检测,如果空闲时间大于  timeBetweenEvictionRunsMillis,执行validationQuery检测连接是否有效
        dataSource.setTestWhileIdle(false);        
        JdbcTemplate jdbcTemplate = new JdbcTemplate(dataSource);
        return jdbcTemplate;
    }
	
	private static List<String> readDetailAddress(){		
		List<String> result = new ArrayList<String>();		
		try {
			FileReader fr;
			File detailAddress = new File(detailAddressFile);
			fr = new FileReader(detailAddress);
			BufferedReader br = new BufferedReader(fr);
			String tempLine = br.readLine();
			while(tempLine != null) {
				result.add(tempLine);
				tempLine = br.readLine();
			}
			
			br.close();
			fr.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return result;
	}
	
	public static void dealDataFile() {
		File dataDir = new File(InitExamineAddress.dataDir);
		if(dataDir.isDirectory()) {
			String[] sonPathList = dataDir.list();
			String pathStr = "["+StringUtils.join(sonPathList, "][")+"]";
			int startCursor = Integer.valueOf(readCursor(cursorPath))+1;
			for(int i=startCursor; i<4340; ++i) {
				try {
					String dataFileName = getDataFileName(pathStr, i);					
					List<List<Object>> sonData = UtilExcel.readExcel(dataDir+"\\"+dataFileName);
					List<String> detailAddressData = getDetailAddressList(sonData);
					appendDetailAddress(detailAddressData);
					writeCursor(cursorPath, i+"");
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
	}
	
	private static void appendDetailAddress(List<String> detailAddressList) throws Exception{
		File detailAddress = new File(detailAddressFile);
		if(!detailAddress.exists()) {
			detailAddress.createNewFile();
		}
		FileWriter fw = new FileWriter(detailAddress, true);
		for(int i=0; i<detailAddressList.size(); ++i) {
			fw.append(detailAddressList.get(i)+"\n");	
		}
		fw.close();
	}
	
	private static void appendError(Exception e) {
		try {
			File detailAddress = new File(errorPath);
			if(!detailAddress.exists()) {
				detailAddress.createNewFile();
			}
			FileWriter fw = new FileWriter(detailAddress, true);
			StringWriter sw = new StringWriter();
			e.printStackTrace(new PrintWriter(sw));
			fw.append(sw.toString());
			sw.close();
			fw.close();	
		}catch(Exception e1) {
			e1.printStackTrace();
		}		
	}
	
	private static List<String> getDetailAddressList(List<List<Object>> addressList){
		List<String> result = new ArrayList<String>();
		if(addressList == null || addressList.size() == 0) {
			return result;
		}
		List<Object> headerList = addressList.get(0);
		List<Integer> addressIndexList = getAddressIndex(headerList);
		for(int i=1; i<addressList.size() && i<10000; ++i) {
			List<Object> addressItem = addressList.get(i);
			for(int j=0; j<addressIndexList.size(); ++j) {
				int addressIndexItem = addressIndexList.get(j);
				if(addressIndexItem < addressItem.size()) {
					Object detailItem = addressItem.get(addressIndexItem);
					if(detailItem != null && detailItem.toString().length() > 10) {
						result.add(detailItem.toString());
					}
				}				
			}
		}		
		return result;
	}
	
	private static List<Integer> getAddressIndex(List<Object> data){
		List<Integer> result = new ArrayList<Integer>();
		if(data == null || data.size() == 0) {
			return result;
		}
		for(int i=0; i<data.size() && i<24; ++i) {
			Object item = data.get(i);
			if(item == null) {
				continue;
			}
			if(item.toString().contains("地址")) {
				result.add(i);
			}
		}
		return result;
	}
	
	private static String getDataFileName(String pathStr, int cursor) throws Exception {
		String fileName = cursor+".xls";
		if(pathStr.contains("["+fileName+"]")) {
			return fileName;
		}
		fileName = cursor+".xlsx";
		if(pathStr.contains("["+fileName+"]")) {
			return fileName;
		}
		fileName = cursor+".XLS";
		if(pathStr.contains("["+fileName+"]")) {
			return fileName;
		}
		fileName = cursor+".XLSX";
		if(pathStr.contains("["+fileName+"]")) {
			return fileName;
		}
		throw new Exception("没有找到"+cursor+"对应的文件");
	}
	
	private static String readCursor(String cursorPath) {
		File dealCursor = new File(cursorPath);
		try {
			if(!dealCursor.exists()) {
				dealCursor.createNewFile();
			}
			FileReader fileReader = new FileReader(dealCursor);
			BufferedReader bfr = new BufferedReader(fileReader);
			String temp = bfr.readLine();
			bfr.close();
			fileReader.close();
			if(temp == null || "".equals(temp)) {
				return "-1";
			}else {
				return temp;
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return "-1";
	}
	
	private static void writeCursor(String cursorPath, String cursor) {
		File dealCursor = new File(cursorPath);
		try {
			if(!dealCursor.exists()) {
				dealCursor.createNewFile();
			}
			FileWriter fw = new FileWriter(dealCursor);
			fw.write(cursor);
			fw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public static void downExcel() {
		//读取Excel url列表
		List<String> urlList = getUrlListFromFile();
		
		//遍历下载Excel文件
		for(int i=0; i<urlList.size(); ++i) {
			String urlItem = urlList.get(i);
			downExcelToLocal(urlItem, "D:\\workdata\\channel_upload_excel\\data\\"+i+urlItem.substring(urlItem.lastIndexOf(".")));
		}
	}
		 
	private static void downExcelToLocal(String fileUrl, String destLocalFilePath) {
		try {
			InputStream is = getObject("ty-image-01", fileUrl);
			BufferedInputStream bis = new BufferedInputStream(is);
			byte[] bufferByte = new byte[1024];
			int readLen = bis.read(bufferByte);
			FileOutputStream fileOutput = new FileOutputStream(destLocalFilePath);
			while(readLen != -1) {
				fileOutput.write(bufferByte, 0, readLen);				
				readLen = bis.read(bufferByte);
			}			
			fileOutput.close();
			bis.close();
			is.close();
		} catch (Exception e) {
			e.printStackTrace();			
		}
	}
	
	private static OSSClient client = new OSSClient("oss-cn-qingdao.aliyuncs.com", "H4fIVB56iHjR6zQw", "7bA395UltFp16kWPJT7Pfz0XYXCk4Q");
	private static InputStream getObject(String bucketName, String key) throws IOException {
        // 获取Object,返回结果为OSSObject对象
        OSSObject object = client.getObject(bucketName, key);
        // 获取Object的输入流
        return object.getObjectContent();
    }
	
	private static List<String> getUrlListFromFile(){		
		List<String> urlList = new ArrayList<String>();
		try {
			File urlFile = new File("D:\\workdata\\channel_upload_excel\\address_file_url.txt");
			FileReader reader = new FileReader(urlFile);
			BufferedReader bufferReader = new BufferedReader(reader);			
			String tempUrl = bufferReader.readLine();
			while(tempUrl != null) {
				urlList.add(tempUrl);
				tempUrl = bufferReader.readLine();
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return urlList;
	}
}