You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
3.9 KiB

8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
8 years ago
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import re
  5. from difflib import SequenceMatcher
  6. from autojump_utils import is_python3
  7. from autojump_utils import last
  8. if is_python3(): # pragma: no cover
  9. ifilter = filter
  10. imap = map
  11. os.getcwdu = os.getcwd
  12. else:
  13. from itertools import ifilter
  14. from itertools import imap
  15. def match_anywhere(needles, haystack, ignore_case=False):
  16. """
  17. Matches needles anywhere in the path as long as they're in the same (but
  18. not necessary consecutive) order.
  19. For example:
  20. needles = ['foo', 'baz']
  21. regex needle = r'.*foo.*baz.*'
  22. haystack = [
  23. (path='/foo/bar/baz', weight=10),
  24. (path='/baz/foo/bar', weight=10),
  25. (path='/foo/baz', weight=10),
  26. ]
  27. result = [
  28. (path='/moo/foo/baz', weight=10),
  29. (path='/foo/baz', weight=10),
  30. ]
  31. """
  32. regex_needle = '.*' + '.*'.join(imap(re.escape, needles)) + '.*'
  33. regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
  34. found = lambda haystack: re.search(
  35. regex_needle,
  36. haystack.path,
  37. flags=regex_flags,
  38. )
  39. return ifilter(found, haystack)
  40. def match_consecutive(needles, haystack, ignore_case=False):
  41. """
  42. Matches consecutive needles at the end of a path.
  43. For example:
  44. needles = ['foo', 'baz']
  45. haystack = [
  46. (path='/foo/bar/baz', weight=10),
  47. (path='/foo/baz/moo', weight=10),
  48. (path='/moo/foo/baz', weight=10),
  49. (path='/foo/baz', weight=10),
  50. ]
  51. # We can't actually use re.compile because of re.UNICODE
  52. regex_needle = re.compile(r'''
  53. foo # needle #1
  54. [^/]* # all characters except os.sep zero or more times
  55. / # os.sep
  56. [^/]* # all characters except os.sep zero or more times
  57. baz # needle #2
  58. [^/]* # all characters except os.sep zero or more times
  59. $ # end of string
  60. ''')
  61. result = [
  62. (path='/moo/foo/baz', weight=10),
  63. (path='/foo/baz', weight=10),
  64. ]
  65. """
  66. regex_no_sep = '[^' + os.sep + ']*'
  67. regex_no_sep_end = regex_no_sep + '$'
  68. regex_one_sep = regex_no_sep + os.sep + regex_no_sep
  69. regex_needle = regex_one_sep.join(imap(re.escape, needles)) + regex_no_sep_end
  70. regex_flags = re.IGNORECASE | re.UNICODE if ignore_case else re.UNICODE
  71. found = lambda entry: re.search(
  72. regex_needle,
  73. entry.path,
  74. flags=regex_flags,
  75. )
  76. return ifilter(found, haystack)
  77. def match_fuzzy(needles, haystack, ignore_case=False, threshold=0.6):
  78. """
  79. Performs an approximate match with the last needle against the end of
  80. every path past an acceptable threshold.
  81. For example:
  82. needles = ['foo', 'bar']
  83. haystack = [
  84. (path='/foo/bar/baz', weight=11),
  85. (path='/foo/baz/moo', weight=10),
  86. (path='/moo/foo/baz', weight=10),
  87. (path='/foo/baz', weight=10),
  88. (path='/foo/bar', weight=10),
  89. ]
  90. result = [
  91. (path='/foo/bar/baz', weight=11),
  92. (path='/moo/foo/baz', weight=10),
  93. (path='/foo/baz', weight=10),
  94. (path='/foo/bar', weight=10),
  95. ]
  96. This is a weak heuristic and used as a last resort to find matches.
  97. """
  98. end_dir = lambda path: last(os.path.split(path))
  99. if ignore_case:
  100. needle = last(needles).lower()
  101. match_percent = lambda entry: SequenceMatcher(
  102. a=needle,
  103. b=end_dir(entry.path.lower()),
  104. ).ratio()
  105. else:
  106. needle = last(needles)
  107. match_percent = lambda entry: SequenceMatcher(
  108. a=needle,
  109. b=end_dir(entry.path),
  110. ).ratio()
  111. meets_threshold = lambda entry: match_percent(entry) >= threshold
  112. return ifilter(meets_threshold, haystack)