Similarity.java 6.51 KB
package com.gimi.common.cinema.utils;

import android.util.Log;

import java.text.NumberFormat;
import java.util.Locale;

/**
 * 比较两个字符串的相似度
 */
public class Similarity {
    public static void main(String[] args) {
        String strA = "我是中国人";
        String strB = "你是中国人";
        double result = SimilarDegree(strA, strB);
        if (result >= 0.7) {
            System.out.println("相似度很高!" + similarityResult(result) + result);
        } else {
            System.out.println("相似度不高" + similarityResult(result) + result);
        }
        System.out.println();
    }

    public static boolean isSimilar(String a, String b) {
        if (a.length() < b.length()) {
            a = a + b;
            b = a.substring(0, a.length() - b.length());
            a = a.substring(b.length());
        }
        double v = SimilarDegree((a), (b));
        return v >= 0.951;
    }

    public static boolean notSimilar(String a, String b) {
        if (a.length() < b.length()) {
            a = a + b;
            b = a.substring(0, a.length() - b.length());
            Log.d("lovely", "exchange");
            a = a.substring(b.length());
        }
        double v = SimilarDegree((a), (b));
        Log.d("lovely", "╔════════════════════════════════════════════");
        Log.d("lovely", "╟  a,b" + (a) + "\n" + (b));
        Log.d("lovely", "╟  Similarity is:" + v);
        Log.d("lovely", "╚════════════════════════════════════════════");
        return v < 0.9;
    }

    private static String getName(String name) {
        if (!name.contains("/")) {
            return name;
        }
        int i = name.lastIndexOf("/");
        String name1 = name.substring(i + 1);
        return name1.substring(0, name1.indexOf("."));
    }

    /**
     * 相似度转百分比
     */
    public static String similarityResult(double resule) {
        return NumberFormat.getPercentInstance(new Locale("en ", "US ")).format(resule);
    }

    /**
     * 相似度比较
     *
     * @param strA
     * @param strB
     * @return
     */
    public static double SimilarDegree(String strA, String strB) {
        String newStrA = removeSign(strA);
        String newStrB = removeSign(strB);
        int temp = 1;
        int temp2 = 0;
//        try {
        temp = Math.max(newStrA.length(), newStrB.length());
//            temp2 = longestCommonSubstring(newStrA, newStrB).length();
        temp2 = lcs(newStrA, newStrB).length();
//        } catch (Exception e) {
//            e.printStackTrace();
//            return .5;
//        }
        return temp2 * 1.0 / temp;
    }

    private static String removeSign(String str) {
        StringBuffer sb = new StringBuffer();
        for (char item : str.toCharArray())
            if (charReg(item)) {
                //System.out.println("--"+item);  
                sb.append(item);
            }
        return sb.toString();
    }

    private static boolean charReg(char charValue) {
        return (charValue >= 0x4E00 && charValue <= 0X9FA5)
                || (charValue >= 'a' && charValue <= 'z')
                || (charValue >= 'A' && charValue <= 'Z')
                || (charValue >= '0' && charValue <= '9');
    }

    public static String longestCommonSubstring(String strA, String strB) {
        char[] chars_strA = strA.toCharArray();
        char[] chars_strB = strB.toCharArray();
        int m = chars_strA.length;
        int n = chars_strB.length;
        int[][] matrix = new int[m + 1][n + 1];
        for (int i = 1; i <= m; i++) {
            for (int j = 1; j <= n; j++) {
                if (chars_strA[i - 1] == chars_strB[j - 1])
                    matrix[i][j] = matrix[i - 1][j - 1] + 1;
                else
                    matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]);
            }
        }
        char[] result = new char[matrix[m][n]];
        int currentIndex = result.length - 1;
        while (matrix[m][n] != 0) {
            if (matrix[n] == matrix[n - 1])
                n--;
            else if (matrix[m][n] == matrix[m - 1][n])
                m--;
            else {
                result[currentIndex] = chars_strA[m - 1];
                currentIndex--;
                n--;
                m--;
            }
        }
        return new String(result);
    }

    public static String lcs(String a, String b) {
        int[][] lengths = new int[a.length() + 1][b.length() + 1];

        // row 0 and column 0 are initialized to 0 already

        for (int i = 0; i < a.length(); i++)
            for (int j = 0; j < b.length(); j++)
                if (a.charAt(i) == b.charAt(j))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] =
                            Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        // read the substring out from the matrix
        StringBuffer sb = new StringBuffer();
        for (int x = a.length(), y = b.length();
             x != 0 && y != 0; ) {
            if (lengths[x][y] == lengths[x - 1][y])
                x--;
            else if (lengths[x][y] == lengths[x][y - 1])
                y--;
            else {
                assert a.charAt(x - 1) == b.charAt(y - 1);
                sb.append(a.charAt(x - 1));
                x--;
                y--;
            }
        }

        return sb.reverse().toString();
    }

    public static String lcps(String a, String b) {
        int length1 = a.length();
        int length2 = b.length();

        StringBuilder stringBuilder = new StringBuilder();
        int compare = Math.min(length1, length2);
        for (int i = 0; i < compare; i++) {
            if (a.charAt(i) == b.charAt(i)) {
                stringBuilder.append(a.charAt(i));
            } else {
                break;
            }
        }

        return stringBuilder.toString();
    }

    public static String lcss(String a, String b) {
        int length1 = a.length();
        int length2 = b.length();

        int compare = Math.min(length1, length2);
        StringBuilder stringBuilder = new StringBuilder();
        for (int i = 1; i <= compare; i++) {
            char c = a.charAt(length1 - i);
            if (c == b.charAt(length2 - i)) {
                stringBuilder.append(c);
            } else {
                break;
            }
        }

        return stringBuilder.reverse().toString();
    }

}