[utils] Create an utils folder, related to #187461

authorvincent.michel@logilab.fr
changeset7019bc0cab44
branchdefault
phasepublic
hiddenno
parent revision#61a56bf04d36 [ner] Remove unused files and move tests, related to #187461
child revision#4a79af983c29 Rename ner in named_entities, related to #187461
files modified by this revision
dataio.py
distances.py
minhashing.py
ner/dataio.py
normalize.py
record_linkage/aligner.py
record_linkage/blocking.py
test/test_alignment.py
test/test_blocking.py
test/test_dataio.py
test/test_distances.py
test/test_minhashing.py
test/test_normalize.py
utils/__init__.py
utils/dataio.py
utils/distances.py
utils/minhashing.py
utils/ner_dataio.py
utils/normalize.py
# HG changeset patch
# User vincent.michel@logilab.fr
# Date 1387464298 0
# Thu Dec 19 14:44:58 2013 +0000
# Node ID 7019bc0cab44ca1ab8c712684a5c331c3d0bff33
# Parent 61a56bf04d367baa4564f43e14612b7131ace3bf
[utils] Create an utils folder, related to #187461

diff --git a/record_linkage/aligner.py b/record_linkage/aligner.py
@@ -19,11 +19,11 @@
1  from collections import defaultdict
2 
3  from scipy import zeros
4  from scipy.sparse import lil_matrix
5 
6 -from nazca.dataio import parsefile
7 +from nazca.utils.dataio import parsefile
8 
9 
10  ###############################################################################
11  ### UTILITY FUNCTIONS #########################################################
12  ###############################################################################
diff --git a/record_linkage/blocking.py b/record_linkage/blocking.py
@@ -30,12 +30,12 @@
13  from functools import partial
14  import warnings
15 
16  from scipy.spatial import KDTree
17 
18 -from nazca.minhashing import Minlsh
19 -from nazca.distances import soundexcode
20 +from nazca.utils.minhashing import Minlsh
21 +from nazca.utils.distances import soundexcode
22 
23 
24  ###############################################################################
25  ### GENERAL BLOCKING ##########################################################
26  ###############################################################################
diff --git a/test/test_alignment.py b/test/test_alignment.py
@@ -19,14 +19,14 @@
27  import unittest2
28  import random
29  random.seed(6) ### Make sure tests are repeatable
30  from os import path
31 
32 -from nazca.normalize import simplify
33 +from nazca.utils.normalize import simplify
34  import nazca.record_linkage.aligner as alig
35  import nazca.record_linkage.blocking as blo
36 -from nazca.distances import LevenshteinProcessing, GeographicalProcessing
37 +from nazca.utils.distances import LevenshteinProcessing, GeographicalProcessing
38 
39 
40  TESTDIR = path.dirname(__file__)
41 
42 
diff --git a/test/test_blocking.py b/test/test_blocking.py
@@ -19,18 +19,18 @@
43  from os import path
44  from functools import partial
45  import random
46  random.seed(6) ### Make sure tests are repeatable / Minhashing
47 
48 -from nazca.distances import (levenshtein, soundex, soundexcode,   \
49 -                             jaccard, euclidean, geographical)
50 +from nazca.utils.distances import (levenshtein, soundex, soundexcode,   \
51 +                                       jaccard, euclidean, geographical)
52  from nazca.record_linkage.blocking import (KeyBlocking, SortedNeighborhoodBlocking,
53                                             MergeBlocking,
54                                             NGramBlocking, PipelineBlocking,
55                                             SoundexBlocking, KmeansBlocking,
56                                             MinHashingBlocking, KdTreeBlocking)
57 -from nazca.normalize import SimplifyNormalizer, loadlemmas
58 +from nazca.utils.normalize import SimplifyNormalizer, loadlemmas
59 
60 
61  TESTDIR = path.dirname(__file__)
62 
63  SOUNDEX_REFSET = (('a1', 'smith'),
diff --git a/test/test_dataio.py b/test/test_dataio.py
@@ -20,11 +20,11 @@
64  import shutil
65  from contextlib import contextmanager
66  from os import path
67  from tempfile import mkdtemp
68 
69 -from nazca.dataio import sparqlquery, parsefile, autocast, split_file
70 +from nazca.utils.dataio import sparqlquery, parsefile, autocast, split_file
71 
72 
73  TESTDIR = path.dirname(__file__)
74 
75  @contextmanager
diff --git a/test/test_distances.py b/test/test_distances.py
@@ -19,13 +19,13 @@
76  import unittest2
77  import random
78  random.seed(6) ### Make sure tests are repeatable
79  from dateutil import parser as dateparser
80 
81 -from nazca.distances import (levenshtein, soundex, soundexcode,\
82 -                             jaccard, euclidean, geographical,
83 -                             LevenshteinProcessing)
84 +from nazca.utils.distances import (levenshtein, soundex, soundexcode,
85 +                                   jaccard, euclidean, geographical,
86 +                                   LevenshteinProcessing)
87 
88 
89  class DistancesTest(unittest2.TestCase):
90      def test_levenshtein(self):
91          self.assertEqual(levenshtein('niche', 'chiens'), 5)
diff --git a/test/test_minhashing.py b/test/test_minhashing.py
@@ -19,12 +19,12 @@
92  import unittest2
93  from os import path
94  import random
95  random.seed(6) ### Make sure tests are repeatable
96 
97 -from nazca.normalize import loadlemmas, simplify
98 -from nazca.minhashing import Minlsh
99 +from nazca.utils.normalize import loadlemmas, simplify
100 +from nazca.utils.minhashing import Minlsh
101 
102  TESTDIR = path.dirname(__file__)
103 
104 
105 
diff --git a/test/test_normalize.py b/test/test_normalize.py
@@ -17,16 +17,16 @@
106  # with this program. If not, see <http://www.gnu.org/licenses/>.
107 
108  import unittest2
109  from os import path
110 
111 -from nazca.normalize import (BaseNormalizer, UnicodeNormalizer, JoinNormalizer,
112 -                             SimplifyNormalizer, TokenizerNormalizer,
113 -                             LemmatizerNormalizer, RoundNormalizer,
114 -                             RegexpNormalizer, NormalizerPipeline,
115 -                             lunormalize, loadlemmas, lemmatized, \
116 -                             roundstr, rgxformat, tokenize, simplify)
117 +from nazca.utils.normalize import (BaseNormalizer, UnicodeNormalizer, JoinNormalizer,
118 +                                   SimplifyNormalizer, TokenizerNormalizer,
119 +                                   LemmatizerNormalizer, RoundNormalizer,
120 +                                   RegexpNormalizer, NormalizerPipeline,
121 +                                   lunormalize, loadlemmas, lemmatized,
122 +                                   roundstr, rgxformat, tokenize, simplify)
123 
124 
125  TESTDIR = path.dirname(__file__)
126 
127 
diff --git a/utils/__init__.py b/utils/__init__.py
diff --git a/dataio.py b/utils/dataio.py
diff --git a/distances.py b/utils/distances.py
@@ -22,11 +22,11 @@
128      DATEUTIL_ENABLED = True
129  except ImportError:
130      DATEUTIL_ENABLED = False
131  from scipy import matrix, empty
132 
133 -from nazca.normalize import tokenize
134 +from nazca.utils.normalize import tokenize
135 
136 
137  ###############################################################################
138  ### UTILITY FUNCTIONS #########################################################
139  ###############################################################################
diff --git a/minhashing.py b/utils/minhashing.py
@@ -21,11 +21,11 @@
140  from collections import defaultdict
141 
142  import numpy as np
143  from scipy.optimize import bisect
144 
145 -from nazca.normalize import iter_wordgrams
146 +from nazca.utils.normalize import iter_wordgrams
147 
148 
149  def randomhashfunction(zr):
150      """ Return a random hash function, mapping x in Z to ZR
151          h:x -> ax + b mod R
diff --git a/ner/dataio.py b/utils/ner_dataio.py
diff --git a/normalize.py b/utils/normalize.py