-
Notifications
You must be signed in to change notification settings - Fork 43
/
hayaku_probe.py
419 lines (348 loc) · 15.9 KB
/
hayaku_probe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# -*- coding: utf-8 -*-
# (c) 2012 Sergey Mezentsev
import os
import re
from itertools import product, chain
def import_dir(name, fromlist=()):
PACKAGE_EXT = '.sublime-package'
dirname = os.path.basename(os.path.dirname(os.path.realpath(__file__)))
if dirname.endswith(PACKAGE_EXT):
dirname = dirname[:-len(PACKAGE_EXT)]
return __import__('{0}.{1}'.format(dirname, name), fromlist=fromlist)
try:
imp = import_dir('hayaku_dict_driver', ('css_defaults', 'get_css_dict', 'get_flat_css', 'css_flat_list'))
css_defaults = imp.css_defaults
get_css_dict = imp.get_css_dict
get_flat_css = imp.get_flat_css
css_flat_list = imp.css_flat_list
except ImportError:
from hayaku_dict_driver import css_defaults, get_css_dict, get_flat_css, css_flat_list
# TODO: Move this to dicts etc.
PRIORITY_PROPERTIES = [ 'display', 'color', 'margin', 'position', 'padding', 'width', 'background', 'zoom', 'height', 'top', 'vertical-align', 'overflow', 'left', 'margin-right', 'float', 'margin-left', 'cursor', 'text-decoration', 'font-size', 'margin-top', 'border', 'background-position', 'font', 'margin-bottom', 'padding-left', 'right', 'padding-right', 'line-height', 'white-space', 'text-align', 'border-color', 'padding-top', 'z-index', 'border-bottom', 'visibility', 'border-radius', 'padding-bottom', 'font-weight', 'clear', 'max-width', 'border-top', 'border-width', 'content', 'bottom', 'background-color', 'opacity', 'background-image', 'box-shadow', 'border-collapse', 'text-overflow', 'filter', 'border-right', 'text-indent', 'clip', 'min-width', 'min-height', 'border-left', 'max-height', 'border-right-color', 'border-top-color', 'transition', 'resize', 'overflow-x', 'list-style', 'word-wrap', 'border-left-color', 'word-spacing', 'background-repeat', 'user-select', 'border-bottom-color', 'box-sizing', 'border-top-left-radius', 'font-family', 'border-bottom-width', 'outline', 'border-bottom-right-radius', 'border-right-width', 'border-top-width', 'font-style', 'text-transform', 'border-bottom-left-radius', 'border-left-width', 'border-spacing', 'border-style', 'border-top-right-radius', 'text-shadow', 'border-image', 'overflow-y', 'table-layout', 'background-size', 'behavior', 'body', 'name', 'letter-spacing', 'background-clip', 'pointer-events', 'transform', 'counter-reset', ]
def get_all_properties(css_dict=None):
if css_dict is None:
css_dict = get_css_dict()[0]
all_properties = list(css_dict)
# раширить парами "свойство значение" (например "position absolute")
for prop_name in all_properties:
property_values = css_flat_list(prop_name, css_dict)
extends_sieve = (i for i in property_values if not i[1].startswith('<'))
unit_sieve = (i for i in extends_sieve if not i[1].startswith('.'))
all_properties.extend('{0} {1}'.format(prop_name, v[1]) for v in unit_sieve)
return all_properties
def score(a, b):
"""Оценочная функция"""
s = 0
# увеличивает вес свойству со значением (они разделены пробелом)
if a and ' ' == a[-1]:
s += 3.0
# уменьшить, если буква находится не на грницах слова
if '-' in a[1:-1] or '-' in b[1:-1]:
s += -2.0
# уменьшить, если буква находится не на грницах слова
if ' ' in a[1:-1] or ' ' in b[1:-1]:
s += -0.5
# если буква в начале слова после -
if a and a[-1] == '-':
s += 1.05
# если буквы подряд
if len(a) == 1:
s += 1.0
# последовательные буквы
if len(b) == 1:
s += 1.0
return s
def string_score(arr):
"""Получает оценку разбиения"""
# s = sum(score(arr[i-1], arr[i]) for i in range(1, len(arr)))
# if s >0 :
# print arr, s
return sum(score(arr[i-1], arr[i]) for i in range(1, len(arr)))
def tree(css_property, abbr):
# функция генерирует деревья (разбиения) из строки
# (abvbc, abc) -> [[a, bvb ,c], [avb, b, c]]
# print '\n', css_property
if len(css_property) < len(abbr):
return set([])
trees = [[css_property[0], css_property[1:],],]
for level in range(1, len(abbr)):
# print level, trees
for tr in trees:
if level == 1 and len(trees) == 1:
trees = []
# находит индексы букв
indexes = []
i = -1
try:
while True:
i = tr[-1].lower().index(abbr[level], i+1)
indexes.append(i)
except ValueError:
pass
# print 'indexes len', len(indexes)
for ind in indexes:
if level == 1:
car = tr[:-1]
cdr = tr[-1]
first = cdr[:ind]
second = cdr[ind:]
add = []
add.append(car[-1] + first)
add.append(second)
# print '\t', car, '|', cdr,'|', first,'|', second, '-', add, level, '=', tr
trees.append(add)
else:
car = tr[:-1]
cdr = tr[-1]
first = cdr[:ind]
second = cdr[ind:]
add = car
add.append(first)
add.append(second)
# print '\t', car, '|', cdr,'|', first,'|', second, '-', add, level, '=', tr
# print repr(first)
trees.append(add)
# break
trees_i = set([tuple(t) for t in trees if len(t) == level+1])
trees = [list(t) for t in trees_i]
# print 'trees_i', trees_i
# break
# print
# break
# удалить разбиения с двумя "-" в шилде
ret = set([tuple(t) for t in trees])
filtered = []
for s in ret: # каждое элемент в сете
for t in s: # каждый шилд в элементе
# print '\t', t
if t.count('-') > 1:
break
else:
filtered.append(s)
# print set([tuple(t) for t in trees])
# print filtered
return filtered
def prop_value(s1, val, all_properties):
"""Генератор возвращает свойства и значения разделённые пробелом
Из всех свойств выбирает только с совпадающим порядком букв"""
for pv in all_properties:
if ' ' not in pv.strip():
continue
prop, value = pv.split()
if sub_string(value.lower(), val.lower()):
if sub_string(prop.lower(), s1.lower()):
yield '{0} {1}'.format(prop, value).strip()
def sub_string(string, sub):
"""Функция проверяет, следуют ли буквы в нужном порядке в слове"""
index = 0
string = string.lower()
for c in sub:
try:
index += string[index:].index(c)+1
except ValueError:
return False
else:
return True
def segmentation(abbr):
"""Разбивает абрревиатуру на элементы"""
# Части аббревиатуры
parts = {
'abbr': abbr # todo: выкинуть, используется только в тестах
}
# Проверка на important свойство
if '!' == abbr[-1]:
abbr = abbr[:-1]
parts['important'] = True
else:
parts['important'] = False
# TODO: вынести regex в compile
# todo: начать тестировать regex
m = re.search(r'^([a-z\$\@\+]?[a-z-]*[a-z]).*$', abbr)
property_ = m if m is None else m.group(1)
if property_ is None:
# Аббревиатура не найдена
return parts
# del m
parts['property-value'] = property_
# удалить из аббревиатуры property
abbr = abbr[len(property_):]
if abbr:
parts['property-name'] = property_
del parts['property-value']
# убрать zen-style разделитель
if abbr and ':' == abbr[0]:
abbr = abbr[1:]
if len(abbr) == 0:
parts['keyword-value'] = ''
if not abbr:
return parts
parts.update(value_parser(abbr))
if 'value' in parts:
assert parts['value'] is None
del parts['value']
elif ('type-value' not in parts and 'type-name' not in parts):
parts['keyword-value'] = abbr
# TODO: сохранять принимаемые значения, например parts['allow'] = ['<color_values>']
return parts
def value_parser(abbr):
# todo: поддержка аббревиатур "w-.e" то есть "width -|em"
parts = {}
# Checking the color
# Better to replace with regex to simplify it
dot_index = 0
if '.' in abbr:
dot_index = abbr.index('.')
if abbr[0] == '#':
parts['color'] = (abbr[1:dot_index or 99])
if dot_index:
parts['color_alpha'] = (abbr[dot_index:])
parts['value'] = None
try:
if all((c.isupper() or c.isdigit() or c == '.') for c in abbr) and 0 <= int(abbr[:dot_index or 99], 16) <= 0xFFFFFF:
parts['color'] = abbr[:dot_index or 99]
if dot_index:
parts['color_alpha'] = (abbr[dot_index:])
parts['value'] = None
except ValueError:
pass
# Проверка на цифровое значение
val = None
numbers = re.sub("[a-z%]+$", "", abbr)
try:
val = float(numbers)
val = int(numbers)
except ValueError:
pass
if val is not None:
parts['type-value'] = val
if abbr != numbers:
parts['type-name'] = abbr[len(numbers):]
return parts
def extract(hayaku):
if isinstance(hayaku, dict):
s1 = hayaku.get('abbr')
css_dict = hayaku.get('options').get('dict')
css_aliases = hayaku.get('options').get('aliases')
else:
s1 = hayaku
css_dict, css_aliases = get_css_dict()
"""В зависимости от найденных компонент в аббревиатуре применяет функцию extract"""
prop_iter = []
parts = segmentation(s1)
abbr_value = False
if 'property-name' in parts:
if parts['important']:
s1 = s1[:-1]
if s1[-1] != ':' and s1 != parts['property-name']:
abbr_value = True
if 'color' in parts:
prop_iter.extend(prop for prop, val in get_flat_css(css_dict) if val == '<color_values>')
if isinstance(parts.get('type-value'), int):
prop_iter.extend(prop for prop, val in get_flat_css(css_dict) if val == '<integer>')
if isinstance(parts.get('type-value'), float):
# TODO: добавить deg, grad, time
prop_iter.extend(prop for prop, val in get_flat_css(css_dict) if val in ('<length>', '<number>', 'percentage'))
# TODO: проверить, всегда ли эта переменная нужна для следующих условий
all_properties = get_all_properties(css_dict)
if 'keyword-value' in parts and not parts['keyword-value']:
prop_iter.extend(all_properties)
if 'keyword-value' in parts:
prop_iter.extend(prop_value(parts['property-name'], parts['keyword-value'], all_properties))
elif 'color' not in parts or 'type-value' in parts:
prop_iter.extend(all_properties)
if not parts or not (parts.get('property-name', '') or parts.get('property-value', '')):
return
abbr = ' '.join([
parts.get('property-name', '') or parts.get('property-value', ''),
parts.get('keyword-value', ''),
])
abbr = abbr.strip()
if not css_aliases.get(s1):
abbr = css_aliases.get(abbr, abbr)
if abbr[-1] == ':':
abbr = abbr[:-1]
starts_properties = []
# todo: переделать механизм PAIRS
# надо вынести константы в css-dict
# по две буквы (bd, bg, ba)
pair = None
for alias in css_aliases:
if (alias.endswith('...')) and abbr.startswith(alias[:-3]):
pair = css_aliases.get(alias)
break
if pair is not None:
starts_properties = [prop for prop in prop_iter if prop.startswith(pair) and sub_string(prop, abbr)]
if not starts_properties:
starts_properties = [prop for prop in prop_iter if prop[0] == abbr[0] and sub_string(prop, abbr)]
if 'type-value' in parts or ('keyword-value' in parts and parts['keyword-value'] == ''):
starts_properties = [i for i in starts_properties if ' ' not in i]
property_ = hayaku_extract(abbr, starts_properties, PRIORITY_PROPERTIES, string_score)
property_, value = property_.split(' ') if ' ' in property_ else (property_, None)
# print property_, value
if not property_:
return {}
parts['property-name'] = property_
if value is not None:
parts['keyword-value'] = value
# Проверка соответствия свойства и значения
allow_values = [val for prop, val in get_flat_css(css_dict) if prop == parts['property-name']]
if 'color' in parts and '<color_values>' not in allow_values:
del parts['color']
if 'type-value' in parts and not any((t in allow_values) for t in ['<integer>', 'percentage', '<length>', '<number>', '<alphavalue>']):
del parts['type-value']
if 'keyword-value' in parts and parts['keyword-value'] not in allow_values:
del parts['keyword-value']
if all([
'keyword-value' not in parts,
'type-value' not in parts,
'color' not in parts,
]) and abbr_value:
return {}
# Добавить значение по-умолчанию
if parts['property-name'] in css_dict:
default_value = css_defaults(parts['property-name'], css_dict)
if default_value is not None:
parts['default-value'] = default_value
obj = css_dict[parts['property-name']]
if 'prefixes' in obj:
parts['prefixes'] = obj['prefixes']
if 'no-unprefixed-property' in obj:
parts['no-unprefixed-property'] = obj['no-unprefixed-property']
if parts['abbr'] == parts.get('property-value'):
del parts['property-value']
return parts
def hayaku_extract(abbr, filtered, priority=None, score_func=None):
# выбирает только те правила куда входят все буквы в нужном порядке
# все возможные разбиения
trees_filtered = []
for property_ in filtered:
trees_filtered.extend(tree(property_, abbr))
# оценки к разбиениям
if score_func is not None:
scores = [(score_func(i), i) for i in trees_filtered]
# выбрать с максимальной оценкой
if scores:
max_score = max(s[0] for s in scores)
filtered_scores = (i for s, i in scores if s == max_score)
filtered = [''.join(t) for t in filtered_scores]
if len(filtered) == 1:
return ''.join(filtered[0])
# выбрать более приоритетные
if len(filtered) == 1:
return filtered[0]
elif len(filtered) > 1 and priority is not None:
# выбирает по приоритету
prior = []
for f in filtered:
p = f.split(' ')[0] if ' ' in f else f
try:
prior.append((priority.index(p), f))
except ValueError:
prior.append((len(priority)+1, f))
prior.sort()
try:
return prior[0][1]
except IndexError:
return ''
else:
return ''