Skip to content

Commit a5ff3e0

Browse files
committed
Added more examples
1 parent 5c47336 commit a5ff3e0

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* The MIT License
3+
*
4+
* Copyright 2015 Thibault Debatty.
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in
14+
* all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
* THE SOFTWARE.
23+
*/
24+
25+
package info.debatty.java.stringsimilarity;
26+
27+
import java.util.ArrayList;
28+
29+
/**
30+
*
31+
* @author tibo
32+
*/
33+
public class Main {
34+
35+
/**
36+
* @param args the command line arguments
37+
*/
38+
public static void main(String[] args) {
39+
ArrayList<StringSimilarityInterface> similarities = new ArrayList<StringSimilarityInterface>();
40+
similarities.add(new JaroWinkler());
41+
similarities.add(new Levenshtein());
42+
similarities.add(new LongestCommonSubsequence());
43+
similarities.add(new NGram(4));
44+
similarities.add(new QGram(4));
45+
46+
ArrayList<Pair> pairs = new ArrayList<Pair>();
47+
// adjacent letters switch
48+
pairs.add(new Pair("ABCDEFGHIJ", "ABDCEFGHIJ"));
49+
50+
// adjacent letters switch, with mixed cases
51+
pairs.add(new Pair("abcdefghij", "ABDCEFGHIJ"));
52+
53+
// close letters switch
54+
pairs.add(new Pair("ABCDEFGHIJ", "ABFDECGHIJ"));
55+
56+
// 2 blocks switch
57+
pairs.add(new Pair("ABCDEFVWXYZ", "VWXYZABCDEF"));
58+
59+
// block switch in a long sequence
60+
pairs.add(new Pair(
61+
"Lorem ipsum dolor sit amet, MY BLOCK IS HERE consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.",
62+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, MY BLOCK IS HERE sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."));
63+
64+
// Semanticly similar
65+
pairs.add(new Pair(
66+
"Mon automobile est en panne",
67+
"Ma voiture est cassée"
68+
));
69+
70+
// multiple blocks switch
71+
pairs.add(new Pair("AZERPOIUQSDFMLKJ", "QSDFMLKJAZERPOIU"));
72+
73+
// completely different...
74+
pairs.add(new Pair("AZERTYUIOP", "QSDFGHJKLM"));
75+
76+
77+
for(Pair pair: pairs) {
78+
System.out.println("\n" + pair._1 + " <> " + pair._2);
79+
for (StringSimilarityInterface similarity: similarities) {
80+
System.out.printf("%-25s : %f\n",
81+
similarity.getClass().getSimpleName(),
82+
similarity.similarity(pair._1, pair._2));
83+
}
84+
}
85+
}
86+
87+
}
88+
89+
class Pair {
90+
public String _1 = "";
91+
public String _2 = "";
92+
93+
public Pair(String s1, String s2) {
94+
_1 = s1;
95+
_2 = s2;
96+
}
97+
}

src/main/java/info/debatty/java/stringsimilarity/QGram.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ public static void main(String[] args) {
1818

1919
// Should be 0.5 (2 / 4)
2020
System.out.println(dig.distance("ABCD", "ABCE"));
21+
22+
// AB BC CD DE BX XB CE
23+
// 2 / 7
24+
System.out.println(dig.similarity("ABCDE", "ABXBCE"));
25+
26+
System.out.println(dig.similarity(
27+
"High Qua1ityMedications Discount On All Reorders = Best Deal Ever! Viagra50/100mg - $1.85 071",
28+
"High Qua1ityMedications Discount On All Reorders = Best Deal Ever! Viagra50/100mg - $1.85 7z3"));
2129
}
2230

2331
private int n;

0 commit comments

Comments
 (0)