Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,16 @@ private Set<String> getSubsumers(ConcRel concept,
}
}
}
if (calcDepth)
depthArray[concept.getNodeIndex()] = (short) (parentMaxDepth + 1);

// Compute the current depth of the concept node
if (calcDepth) {
// Dummy concept has no length
if ( concept.getConceptID().contentEquals("C0000000") ) {
depthArray[concept.getNodeIndex()] = (short) (0);
} else {
depthArray[concept.getNodeIndex()] = (short) (parentMaxDepth + 1);
}
}
// add the concept itself to the set of subsumers
subsumers.add(concept.getConceptID());
// add this to the cache - copy the key so that this can be gc'ed as
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,43 @@
* compute intrinsic LCH as in eqn 28 from
* http://dx.doi.org/10.1016/j.jbi.2011.03.013
*
* Scale to unit interval
* This version is NOT scaled to the unit metric
*
* @author vijay
*
*/
public class IntrinsicLCHMetric extends BaseSimilarityMetric {
double logMaxIC2 = 0d;

public IntrinsicLCHMetric(ConceptSimilarityService simSvc, Double maxIC) {
super(simSvc);
if (maxIC != null)
this.logMaxIC2 = Math.log(2 * maxIC.doubleValue()) + 1d;
}

double maxIC2 = 0d;

@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
double sim = 0d;
if (logMaxIC2 != 0d) {

if (maxIC2 != 0d) {

double ic1 = simSvc.getIC(concept1, true);
double ic2 = simSvc.getIC(concept2, true);
double lcsIC = initLcsIC(concept1, concept2, conceptFilter,
simInfo, true);
sim = 1 - (Math.log(ic1 + ic2 - 2 * (lcsIC) + 1) / logMaxIC2);


// Compute the Intrinsic LCH metric
double sim = Math.log( (ic1 + ic2 - (2d * lcsIC) + 1d) / maxIC2) * -1.0d;
return sim;

}
return sim;
return 0d;

}

public IntrinsicLCHMetric(ConceptSimilarityService simSvc, Double maxIC) {
super(simSvc);
if (maxIC != null) {
// Compute the denominator of the Intrinsic LCH metric
this.maxIC2 = 2.0d * maxIC.doubleValue();
}
}



}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,15 @@ public double similarity(String concept1, String concept2,
return 0d;
double ic1 = simSvc.getIC(concept1, true);
double ic2 = simSvc.getIC(concept2, true);
return lcsIC / (ic1 + ic2 - lcsIC);

//
// Test that we get a positive denominator
//
if ( ic1 + ic2 > lcsIC ) {
return lcsIC / (ic1 + ic2 - lcsIC);
} else {
return 0d;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,36 @@

import java.util.Map;

/**
*
* This metric is an implementation of the semantic relatedness measure described
* by Leacock and Chodorow (1998).
*
* See reference paper: https://aclanthology.org/J06-1003.pdf
* Page 19, Sec 2.5.3 (7)
*
* sim(c1,c2) = -log ( len(c1,c2) / 2 * max_depth )
*
*/
public class LCHMetric extends BaseSimilarityMetric {
/**
* log(max depth * 2)
* natural log(max depth * 2)
*/
double logdm = 0d;
double maxDepth = 0d;

@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
if (logdm != 0d) {
if (maxDepth != 0d) {
initLCSes(concept1, concept2, simInfo);
if (simInfo.getLcsDist() > 0) {
// double lch = logdm - Math.log((double) simInfo.getLcsDist());
// // scale to depth
// return lch / logdm;
return 1 - (Math.log((double) simInfo.getLcsDist()) / logdm);

double length = simInfo.getLcsDist();

// Compute the length between the concepts
double lch = Math.log(length / (double)(2 * maxDepth)) * -1.0d;
return lch;

}
}
return 0d;
Expand All @@ -44,7 +58,10 @@ public double similarity(String concept1, String concept2,
public LCHMetric(ConceptSimilarityService simSvc, Integer maxDepth) {
super(simSvc);
if (maxDepth != null) {
this.logdm = Math.log(2 * maxDepth);
// The cTakes YTEX concept graph adds a dummy node C000000
// which should be reduced by 1 for computing the max depth
// correctly
this.maxDepth = maxDepth - 1;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,39 @@ public void setIntrinsicIC(boolean intrinsicIC) {
@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
// don't bother if the concept graph is null

// Test that there is a valid concept graph
if (!validCG)
return 0d;
// get lcs
double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo,
this.intrinsicIC);
if (lcsIC == 0d) {
return 0d;
}
// get ic of concepts

// Compute the IC values for each concept
double ic1 = simSvc.getIC(concept1, this.intrinsicIC);
double ic2 = simSvc.getIC(concept2, this.intrinsicIC);

// Get the LCS with the lowest IC score
double lcsIC = initLcsIC(concept1, concept2, conceptFilter, simInfo,
this.intrinsicIC);

// if the corpus IC is 0 and the concept is not the root, then we don't
// have any IC on the concept and can't measure similarity - return 0
if (!intrinsicIC && ic1 == 0 && !rootConcept.equals(concept1))
return 0d;

if (!intrinsicIC && ic2 == 0 && !rootConcept.equals(concept2))
return 0d;
double denom = ic1 + ic2;
if (denom == 0)
return 0d;
return 2 * lcsIC / denom;

// Compute the Lin score
double sim = (2d * lcsIC) / ( ic1 + ic2 );
return sim;

}

/**
* This constructor allows us to specify if we want the standard Lin
* metric or the Intrinsic Lin by passing a boolean flag
* @param simSvc
* @param intrinsicIC if true, then compute the intrinsic Lin metric
*/
public LinMetric(ConceptSimilarityService simSvc, boolean intrinsicIC) {
super(simSvc);
this.intrinsicIC = intrinsicIC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,43 @@

import java.util.Map;

/**
* Wu-Palmer metric matches results as found in the CPAN UMLS-Similarity::wup module
*
* @author vijay
* @author painter
*
*/
public class WuPalmerMetric extends BaseSimilarityMetric {
@Override
public double similarity(String concept1, String concept2,
Map<String, Double> conceptFilter, SimilarityInfo simInfo) {
initLCSes(concept1, concept2, simInfo);

if (simInfo.getLcses().size() > 0) {
int lcsDepth = 0;

// Test for the LCS with the greatest depth
// to find the lowest common synonym
for (String lcs : simInfo.getLcses()) {
int d = simSvc.getDepth(lcs);

// The depth of the LCS is off by 1
int d = simSvc.getDepth(lcs) + 1;
// Find the max depth of the LCS
if (d > lcsDepth)
lcsDepth = d;
}
double lcsDepth2 = (double) (lcsDepth * 2);
return lcsDepth2 / (lcsDepth2 + (double) (simInfo.getLcsDist()-1));

//
// Compute Wu-Palmer Similarity:
//
double lcsDist = simInfo.getLcsDist().doubleValue();

// Adjust depth by 1 due to fake root node
double c1Depth = simSvc.getDepth(concept1) + 1;
double c2Depth = simSvc.getDepth(concept2) + 1;
double score = ( 2.0 * (lcsDepth) / ( c1Depth + c2Depth ) );
return score;
}
return 0d;
}
Expand Down