[Aligner] `normalize_set` handles tuples. (closes #117136)

authorSimon Chabot <simon.chabot@logilab.fr>
changeset6d80b4e863f3
branchdefault
phasepublic
hiddenno
parent revision#e5f1e678e654 [doc] Little explanation on alignall_iterative() (closes #116943)
child revision#db9a8b3f6f16 [doc] Use sphinx roles and update the sample code (closes #119623)
files modified by this revision
aligner.py
test/test_alignment.py
# HG changeset patch
# User Simon Chabot <simon.chabot@logilab.fr>
# Date 1359114113 -3600
# Fri Jan 25 12:41:53 2013 +0100
# Node ID 6d80b4e863f34bc7e889bbf5dad928d0a3f0b878
# Parent e5f1e678e6546c0d2a2bfc9a50e21f15504f4092
[Aligner] `normalize_set` handles tuples. (closes #117136)

diff --git a/aligner.py b/aligner.py
@@ -29,11 +29,14 @@
1  import nazca.matrix as m
2 
3 
4  def normalize_set(rset, treatments):
5      """ Apply all the normalization functions to the given rset """
6 +    normalized_set = []
7      for row in rset:
8 +        row = list(row)
9 +        normalized_set.append(row)
10          for ind, attribut in enumerate(row):
11              treat = treatments.get(ind)
12              if not attribut or not treat:
13                  continue
14              for f in treat.get('normalization', []):
@@ -42,11 +45,11 @@
15                  # provided ones
16                  givenargs = dict((arg, treat['norm_params'][arg])
17                                   for arg in farg if arg in treat.get('norm_params', []))
18                  attribut = f(attribut, **givenargs)
19              row[ind] = attribut
20 -    return rset
21 +    return normalized_set
22 
23  def findneighbours_kdtree(alignset, targetset, indexes=(1, 1), threshold=0.1):
24      """ Find the neigbhours using kdree
25      """
26      #If an element is None (missing), use instead the identity element.
diff --git a/test/test_alignment.py b/test/test_alignment.py
@@ -307,10 +307,33 @@
27                       ['T3', 'labelt3', (6.25, 48.91)],
28                       ]
29          neighbours = alig.findneighbours_kdtree(alignset, targetset, indexes=(2, 2), threshold=0.3)
30          self.assertEqual(neighbours, [[[0], [0, 2]], [[1], [0, 2]], [[2], [1]], [[3], [1]]])
31 
32 +    def test_normalize_set(self):
33 +        treatments = {1: {'normalization': [simplify,]}}
34 +
35 +        alignlist = [['Label1', u"Un nuage flotta dans le grand ciel bleu."],
36 +                     ['Label2', u"Pour quelle occasion vous êtes-vous apprêtée ?"],
37 +                     ['Label3', u"Je les vis ensemble à plusieurs occasions."],
38 +                     ['Label4', u"Je n'aime pas ce genre de bandes dessinées tristes."],
39 +                     ['Label5', u"Ensemble et à plusieurs occasions, je les vis."],
40 +                    ]
41 +        aligntuple = [tuple(l) for l in alignlist]
42 +
43 +        normalizedlist = alig.normalize_set(alignlist, treatments)
44 +        normalizedtuple = alig.normalize_set(aligntuple, treatments)
45 +
46 +        self.assertListEqual(normalizedlist, normalizedtuple)
47 +        self.assertListEqual(normalizedlist,
48 +                        [['Label1', u"nuage flotta grand ciel bleu"],
49 +                         ['Label2', u"occasion êtes apprêtée"],
50 +                         ['Label3', u"vis ensemble à plusieurs occasions"],
51 +                         ['Label4', u"n aime genre bandes dessinées tristes"],
52 +                         ['Label5', u"ensemble à plusieurs occasions vis"],
53 +                        ])
54 +
55      def test_findneighbours_minhashing(self):
56          lemmas = loadlemmas(path.join(TESTDIR, 'data', 'french_lemmas.txt'))
57          treatments = {2: {'normalization': [simplify,], 'norm_params': {'lemmas': lemmas}}}
58          alignset = [['V1', 'label1', u"Un nuage flotta dans le grand ciel bleu."],
59                      ['V2', 'label2', u"Pour quelle occasion vous êtes-vous apprêtée ?"],