<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://www.aclweb.org/adminwiki/index.php?action=history&amp;feed=atom&amp;title=Doublecheck_v5.py</id>
	<title>Doublecheck v5.py - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://www.aclweb.org/adminwiki/index.php?action=history&amp;feed=atom&amp;title=Doublecheck_v5.py"/>
	<link rel="alternate" type="text/html" href="https://www.aclweb.org/adminwiki/index.php?title=Doublecheck_v5.py&amp;action=history"/>
	<updated>2026-06-26T10:35:55Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.43.6</generator>
	<entry>
		<id>https://www.aclweb.org/adminwiki/index.php?title=Doublecheck_v5.py&amp;diff=72175&amp;oldid=prev</id>
		<title>Knmnyn at 01:22, 7 September 2017</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/adminwiki/index.php?title=Doublecheck_v5.py&amp;diff=72175&amp;oldid=prev"/>
		<updated>2017-09-07T01:22:27Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 01:22, 7 September 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l3&quot;&gt;Line 3:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 3:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# V5: Added a bit of documentation and provenance.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# V5: Added a bit of documentation and provenance.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;#&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;#&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;# If reading on the ACL AdminWiki you may have to view the source material to see its proper formatting.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# Originally received by ACL 2017 PC co-chairs from ICML 2017 PC co-chairs Doina Precup and Yee Whye Teh&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# Originally received by ACL 2017 PC co-chairs from ICML 2017 PC co-chairs Doina Precup and Yee Whye Teh&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# Use the global variables immediately after the import to change the .csv files to be checked and to tune the similarity threshold to report for manual review.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;# Use the global variables immediately after the import to change the .csv files to be checked and to tune the similarity threshold to report for manual review.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Knmnyn</name></author>
	</entry>
	<entry>
		<id>https://www.aclweb.org/adminwiki/index.php?title=Doublecheck_v5.py&amp;diff=72174&amp;oldid=prev</id>
		<title>Knmnyn: Created page with &quot;#!/usr/bin/env python # Submission Similarity Checker (v5) # V5: Added a bit of documentation and provenance. # # Originally received by ACL 2017 PC co-chairs from ICML 2017 P...&quot;</title>
		<link rel="alternate" type="text/html" href="https://www.aclweb.org/adminwiki/index.php?title=Doublecheck_v5.py&amp;diff=72174&amp;oldid=prev"/>
		<updated>2017-09-07T01:21:11Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;#!/usr/bin/env python # Submission Similarity Checker (v5) # V5: Added a bit of documentation and provenance. # # Originally received by ACL 2017 PC co-chairs from ICML 2017 P...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;#!/usr/bin/env python&lt;br /&gt;
# Submission Similarity Checker (v5)&lt;br /&gt;
# V5: Added a bit of documentation and provenance.&lt;br /&gt;
#&lt;br /&gt;
# Originally received by ACL 2017 PC co-chairs from ICML 2017 PC co-chairs Doina Precup and Yee Whye Teh&lt;br /&gt;
# Use the global variables immediately after the import to change the .csv files to be checked and to tune the similarity threshold to report for manual review.&lt;br /&gt;
# The CSV files should have to the format of &amp;quot;ID&amp;quot;, &amp;quot;Title&amp;quot;, &amp;quot;Abstract&amp;quot; and optional &amp;quot;Author&amp;quot;    &lt;br /&gt;
import argparse&lt;br /&gt;
import csv&lt;br /&gt;
import copy&lt;br /&gt;
import math&lt;br /&gt;
from operator import itemgetter&lt;br /&gt;
&lt;br /&gt;
# Tunable parameters&lt;br /&gt;
ABSTR_1 = &amp;#039;IROS14.csv&amp;#039; # First source file&lt;br /&gt;
ABSTR_2 = &amp;#039;RSS14.csv&amp;#039; # Second source file&lt;br /&gt;
THR = 0.4 # cosine similarity threshold&lt;br /&gt;
&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
&lt;br /&gt;
def distentry(one_entry, two_entry):&lt;br /&gt;
&lt;br /&gt;
	ssum = 0&lt;br /&gt;
	for w in one_entry[&amp;#039;bow&amp;#039;]:	&lt;br /&gt;
		if(w in two_entry[&amp;#039;bow&amp;#039;]):&lt;br /&gt;
			ssum += one_entry[&amp;#039;bow&amp;#039;][w] * two_entry[&amp;#039;bow&amp;#039;][w]&lt;br /&gt;
	return(ssum)&lt;br /&gt;
		&lt;br /&gt;
&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
&lt;br /&gt;
def do_tfidf(corpora):&lt;br /&gt;
	&lt;br /&gt;
	N = len(corpora)&lt;br /&gt;
	&lt;br /&gt;
	#~ dictionary of all terms in the corpora	&lt;br /&gt;
	for c in corpora:&lt;br /&gt;
		tf = {}&lt;br /&gt;
		words_abstr = c[&amp;#039;abstract&amp;#039;].split()&lt;br /&gt;
		&lt;br /&gt;
		for w in words_abstr:&lt;br /&gt;
			w = w.replace(&amp;#039;,&amp;#039;,&amp;#039;&amp;#039;).replace(&amp;#039;.&amp;#039;,&amp;#039;&amp;#039;).replace(&amp;#039;)&amp;#039;,&amp;#039;&amp;#039;).replace(&amp;#039;(&amp;#039;,&amp;#039;&amp;#039;).replace(&amp;#039;;&amp;#039;,&amp;#039;&amp;#039;).replace(&amp;#039;:&amp;#039;,&amp;#039;&amp;#039;)&lt;br /&gt;
			if(w not in tf):&lt;br /&gt;
				tf[w] = 0&lt;br /&gt;
			tf[w] += 1&lt;br /&gt;
		c[&amp;#039;tf&amp;#039;] = tf&lt;br /&gt;
	&lt;br /&gt;
	idf = {}&lt;br /&gt;
	for c in corpora:&lt;br /&gt;
		for w in c[&amp;#039;tf&amp;#039;]:&lt;br /&gt;
			if(w not in idf):&lt;br /&gt;
				idf[w] = 0				&lt;br /&gt;
			idf[w] += 1&lt;br /&gt;
	&lt;br /&gt;
	sanity1 = 0&lt;br /&gt;
	for w in idf:&lt;br /&gt;
		if(idf[w] &amp;gt; N):&lt;br /&gt;
			print (&amp;#039;ASSERT FAILED!&amp;#039;)&lt;br /&gt;
			exit(1)&lt;br /&gt;
	&lt;br /&gt;
	for w in idf:&lt;br /&gt;
		idf[w] = math.log(float(N) / (1.0 + float(idf[w])))&lt;br /&gt;
		&lt;br /&gt;
	#~ TFIDF	&lt;br /&gt;
	for c in corpora:&lt;br /&gt;
		bow = {}&lt;br /&gt;
		norm1 = 0&lt;br /&gt;
		for w in c[&amp;#039;tf&amp;#039;]:		&lt;br /&gt;
			if(w not in bow):&lt;br /&gt;
				bow[w] =0&lt;br /&gt;
			bow[w] = c[&amp;#039;tf&amp;#039;][w] * idf[w]&lt;br /&gt;
			norm1 += bow[w] * bow[w]&lt;br /&gt;
&lt;br /&gt;
		for w in c[&amp;#039;tf&amp;#039;]:&lt;br /&gt;
			bow[w] /= math.sqrt(norm1)&lt;br /&gt;
		&lt;br /&gt;
		c[&amp;#039;bow&amp;#039;] = bow&lt;br /&gt;
				&lt;br /&gt;
		&lt;br /&gt;
	return(corpora)	&lt;br /&gt;
                   &lt;br /&gt;
&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
&lt;br /&gt;
def loadf_conf(fname):&lt;br /&gt;
	&lt;br /&gt;
	f=open(fname,&amp;#039;rU&amp;#039;) # opens file for reading&lt;br /&gt;
	reader = csv.reader(f,delimiter =&amp;quot;,&amp;quot;)&lt;br /&gt;
	&lt;br /&gt;
	&lt;br /&gt;
	info = []&lt;br /&gt;
	counter = 0&lt;br /&gt;
	for line in reader:&lt;br /&gt;
		if(len(line) &amp;gt;= 3):&lt;br /&gt;
			if(line[0].isdigit()):&lt;br /&gt;
				onestr = {}&lt;br /&gt;
				onestr[&amp;#039;id&amp;#039;] = int(line[0])&lt;br /&gt;
				onestr[&amp;#039;title&amp;#039;] = line[1]&lt;br /&gt;
				onestr[&amp;#039;abstract&amp;#039;] = line[2]&lt;br /&gt;
                                if(len(line) == 4):&lt;br /&gt;
                                    onestr[&amp;#039;authors&amp;#039;] = line[3]&lt;br /&gt;
                                else:&lt;br /&gt;
                                    onestr[&amp;#039;authors&amp;#039;] = &amp;#039;&amp;#039;&lt;br /&gt;
				info.append(onestr)&lt;br /&gt;
&lt;br /&gt;
	return(info)&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
parser = argparse.ArgumentParser(description=&amp;#039;Doublesubmission checker 0.01&amp;#039;)&lt;br /&gt;
parser.add_argument(&amp;quot;c1&amp;quot;,   help=&amp;quot;CSV of conference1&amp;quot;)&lt;br /&gt;
parser.add_argument(&amp;quot;c2&amp;quot;,   help=&amp;quot;CSV of conference2&amp;quot;)&lt;br /&gt;
parser.add_argument(&amp;quot;-t&amp;quot;,   help=&amp;quot;detection threshold [default: %g]&amp;quot;%THR, default=THR, required=False)&lt;br /&gt;
args = vars(parser.parse_args())&lt;br /&gt;
f1 = args[&amp;#039;c1&amp;#039;]&lt;br /&gt;
f2 = args[&amp;#039;c2&amp;#039;]&lt;br /&gt;
self_comparison = (f1 == f2)&lt;br /&gt;
print (&amp;#039;Self comparison: &amp;#039; + str(self_comparison))&lt;br /&gt;
thr = float(args[&amp;#039;t&amp;#039;])&lt;br /&gt;
print (&amp;#039;## Double submission checker v 0.02. L. Spinello, G.D. Tipaldi (slight modifications by W. Burgard) 2014&amp;#039;)&lt;br /&gt;
print (&amp;#039;&amp;gt; Detection threshold set to &amp;#039;+str(thr))&lt;br /&gt;
print (&amp;#039;&amp;gt; Load file &amp;#039; + f1)&lt;br /&gt;
iros_raw = loadf_conf(f1)&lt;br /&gt;
print (&amp;#039;entries: &amp;#039; + str(len(iros_raw)))&lt;br /&gt;
print (&amp;#039;&amp;gt; Load file &amp;#039; + f2)&lt;br /&gt;
rss_raw = loadf_conf(f2)&lt;br /&gt;
print (&amp;#039;entries: &amp;#039; + str(len(rss_raw)))&lt;br /&gt;
rss_len = len(rss_raw)&lt;br /&gt;
corpora_t = copy.deepcopy(rss_raw)&lt;br /&gt;
for ir in iros_raw:	&lt;br /&gt;
	corpora_t.append(ir)&lt;br /&gt;
print (&amp;#039;&amp;gt; Compute TFIDF&amp;#039;)&lt;br /&gt;
corpora = do_tfidf(corpora_t)&lt;br /&gt;
&lt;br /&gt;
bestv = 0&lt;br /&gt;
wmatches = []&lt;br /&gt;
for i in range(rss_len):&lt;br /&gt;
	bestmatch = -1&lt;br /&gt;
	bestmatch_idx = -1&lt;br /&gt;
	for j in range(rss_len+1,len(corpora)):&lt;br /&gt;
         if((i != (j-rss_len)) or (not self_comparison)):&lt;br /&gt;
            d = distentry(corpora[i], corpora[j])&lt;br /&gt;
            if(d &amp;gt; bestmatch):&lt;br /&gt;
                bestmatch = d&lt;br /&gt;
                bestmatch_idx = j&lt;br /&gt;
&lt;br /&gt;
	if(bestmatch &amp;gt; thr):&lt;br /&gt;
		onematch = {}&lt;br /&gt;
		onematch[&amp;#039;from_id&amp;#039;] = i&lt;br /&gt;
		onematch[&amp;#039;to_id&amp;#039;] = bestmatch_idx&lt;br /&gt;
		onematch[&amp;#039;to_value&amp;#039;] = bestmatch&lt;br /&gt;
		wmatches.append(onematch)&lt;br /&gt;
&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
#~ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #&lt;br /&gt;
swmatches = sorted(wmatches, key=itemgetter(&amp;#039;to_value&amp;#039;), reverse = True) &lt;br /&gt;
&lt;br /&gt;
for wins in swmatches:&lt;br /&gt;
	print (&amp;#039;====================================================&amp;#039;)&lt;br /&gt;
	cid1 = wins[&amp;#039;to_id&amp;#039;]&lt;br /&gt;
	cid2 = wins[&amp;#039;from_id&amp;#039;]&lt;br /&gt;
        print (f1 + &amp;#039; id &amp;#039;+ str(corpora[cid1][&amp;#039;id&amp;#039;]) + &amp;#039;, Title: &amp;#039; + corpora[cid1][&amp;#039;title&amp;#039;] + &amp;#039;\nAuthors: &amp;#039; + corpora[cid1][&amp;#039;authors&amp;#039;] )&lt;br /&gt;
        print (f2 + &amp;#039; id &amp;#039;+ str(corpora[cid2][&amp;#039;id&amp;#039;]) + &amp;#039;, Title: &amp;#039; + corpora[cid2][&amp;#039;title&amp;#039;] + &amp;#039;\nAuthors: &amp;#039; + corpora[cid2][&amp;#039;authors&amp;#039;] )&lt;br /&gt;
	print (&amp;#039;Cosine dist: &amp;#039; + str(wins[&amp;#039;to_value&amp;#039;]))&lt;br /&gt;
	print (&amp;#039;&amp;#039;)&lt;br /&gt;
	print (&amp;#039;abstr &amp;#039; + f1 + &amp;quot;: &amp;quot; + corpora[cid1][&amp;#039;abstract&amp;#039;])&lt;br /&gt;
	print (&amp;#039;----&amp;#039;)&lt;br /&gt;
	print (&amp;#039;abstr &amp;#039; + f2 + &amp;quot;: &amp;quot; + corpora[cid2][&amp;#039;abstract&amp;#039;])&lt;br /&gt;
	print (&amp;#039;&amp;#039;)&lt;br /&gt;
	print (&amp;#039;&amp;#039;)&lt;/div&gt;</summary>
		<author><name>Knmnyn</name></author>
	</entry>
</feed>