uripecker.sh.skel 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #!/bin/bash
  2. shellfu import pretty
  3. uripecker() {
  4. #
  5. # Scan stdin for what looks like URI, ID or keyword
  6. #
  7. # Usage:
  8. # <SOURCE_TEXT uripecker MAP
  9. #
  10. # Search through SOURCE_TEXT and output in following order:
  11. #
  12. # 1. apparent URIs,
  13. # 2. hash IDs ("bug#1234"),
  14. # 3. equal sign expressions ("bug = 1234"),
  15. # 4. "tags" ("bug1234")
  16. # 5. keyword expressions ("BUG 1234" or "g hello world"),
  17. #
  18. # all (except the first one, obviously) converted to URI using mappings
  19. # from MAP file.
  20. #
  21. # Note that keyword expressions (e.g. "bug 123") work only if they start
  22. # the line; rest of the line is taken as query argument and URL-quoted,
  23. # so that "g what is bmo" would work as expected (given 'g' is defined
  24. # as Google search).
  25. #
  26. # Apply this filter to args or clipboard, and either use head -1 or
  27. # if you are brave, open all URIs.
  28. #
  29. # The MAP file can contain any number of query mappings in format:
  30. #
  31. # NAME = URI_PATTERN
  32. #
  33. # where NAME is any string without spaces, equal sign or dot, and
  34. # URI_PATTERN is a string with precisely one instance of '%s'.
  35. #
  36. # For example, given this MAP file:
  37. #
  38. # issue = http://gitlab.example.org/issue?id=%s
  39. # faq = http://faq.example.org/faq/%s
  40. #
  41. # following text
  42. #
  43. # issue = 1
  44. # faq#225
  45. # issue42
  46. # http://other.example.com/
  47. # faq 14
  48. # or faq 15
  49. #
  50. # would return
  51. #
  52. # http://other.example.com/
  53. # http://faq.example.org/faq/225
  54. # http://gitlab.example.org/issue?id=1
  55. # http://gitlab.example.org/issue?id=42
  56. # http://faq.example.org/faq/14
  57. #
  58. # Note that the URI_PATTERN can be any kind of URI, such as ftp:// URI,
  59. # (or any string, actually) but the '%s' is converted using HTTP URI rules.
  60. #
  61. local MapFile=$1; shift
  62. test -n "$MapFile" || {
  63. warn "usage: uripecker MAP"
  64. return 2
  65. }
  66. local MapBody
  67. local tmp # temporary pipe storage
  68. tmp=$(mktemp -d -t uripecker.XXXXXXXX)
  69. local maybe_uris="$tmp/maybe_uris"
  70. local maybe_ids="$tmp/maybe_ids"
  71. local maybe_exps="$tmp/maybe_exps"
  72. local maybe_tags="$tmp/maybe_tags"
  73. local maybe_kws="$tmp/maybe_kws"
  74. local uris="$tmp/uris"
  75. local uris_from_ids="$tmp/uris_from_ids"
  76. local uris_from_exps="$tmp/uris_from_exps"
  77. local uris_from_tags="$tmp/uris_from_tags"
  78. local uris_from_kws="$tmp/uris_from_kws"
  79. MapBody=$(<"$MapFile") || {
  80. warn "error reading map file: $MapFile"
  81. return 3
  82. }
  83. ##
  84. # heat up and fill pipes
  85. #
  86. mkfifo "$maybe_uris" "$maybe_ids" "$maybe_exps" "$maybe_tags" "$maybe_kws" \
  87. "$uris" "$uris_from_ids" "$uris_from_exps" "$uris_from_tags" "$uris_from_kws"
  88. __uripecker__minimize | tee "$maybe_uris" "$maybe_ids" "$maybe_exps" "$maybe_tags" "$maybe_kws" \
  89. >/dev/null &
  90. ##
  91. # process each pipe *async* by different filter
  92. #
  93. < "$maybe_uris" __uripecker__flt_uris > "$uris" &
  94. < "$maybe_ids" __uripecker__flt_ids | __uripecker__deref > "$uris_from_ids" &
  95. < "$maybe_exps" __uripecker__flt_exps | __uripecker__deref > "$uris_from_exps" &
  96. < "$maybe_tags" __uripecker__flt_tags | __uripecker__deref > "$uris_from_tags" &
  97. < "$maybe_kws" __uripecker__flt_kws | __uripecker__deref > "$uris_from_kws" &
  98. ##
  99. # print result *sync* in correct order
  100. #
  101. {
  102. cat "$uris"
  103. cat "$uris_from_ids"
  104. cat "$uris_from_exps"
  105. cat "$uris_from_tags"
  106. cat "$uris_from_kws"
  107. } | grep . # throw away empties; add missing LF
  108. rm -rf "$tmp"
  109. }
  110. __uripecker__map_lookup() {
  111. #
  112. # Look up query $1 in $Map
  113. #
  114. local key=${1,,}
  115. sed '
  116. s/^ *//
  117. s/ *$//
  118. s/ *= */=/
  119. ' <<<"$MapBody" \
  120. | grep "^$key=" \
  121. | cut -d= -f2-
  122. }
  123. __uripecker__deref() {
  124. #
  125. # Turn query (like "g hello" for google) to URI
  126. #
  127. local kw # keyword part, eg. "g" or "bug"
  128. local query # query part, eg. "hello+dolly" or "1234"
  129. local fmt # for queries: format string (from ini) to pass to printf
  130. while read -r kw query;
  131. do
  132. debug -v kw query
  133. fmt=$(__uripecker__map_lookup "$kw") || return 1
  134. debug -v fmt
  135. #shellcheck disable=SC2059
  136. printf "$fmt\n" "$(__uripecker__urlquote "$query")"
  137. done
  138. }
  139. __uripecker__flt_exps() {
  140. #
  141. # Hack expressions like bug = 123 out of the text
  142. #
  143. sed -e 's/(\d)\</\n/;' \
  144. | perl -CS -ne '
  145. next unless m/\b([a-zA-Z]\w*\s*=\s*[[:alnum:]][[:alnum:]_#-]*)\b/;
  146. print "$1\n";
  147. ' \
  148. | sed -re 's/\s*=\s*/ /'
  149. }
  150. __uripecker__flt_ids() {
  151. #
  152. # Hack doer-like id's (ID#123) out of the text
  153. #
  154. tr ' ' '\n' \
  155. | perl -CS -ne '
  156. next unless m/\b([a-zA-Z]\w*#[[:alnum:]][[:alnum:]_#-]*)\b/;
  157. print "$1\n";
  158. ' \
  159. | tr '#' ' '
  160. }
  161. __uripecker__flt_kws() {
  162. #
  163. # Hack out lines that look like kw expressions (word space text
  164. #
  165. # Eg. 'wiki hello world'
  166. #
  167. grep -Ee '^\s*[a-zA-Z][[:alpha:]_]*\s+[^=]'
  168. }
  169. __uripecker__flt_tags() {
  170. #
  171. # Hack "tags" like bug123 out of the text
  172. #
  173. tr -c '[:alnum:]' '\n' \
  174. | grep -E '^[a-zA-Z]+[0-9]+$' \
  175. | sed -r 's/([a-zA-Z]+)([0-9])/\1 \2/'
  176. }
  177. __uripecker__flt_uris() {
  178. #
  179. # Hack URIs out of the text.
  180. #
  181. # Uses URL parser regex Found as SO answer[1] and adapted to include
  182. # comments from the original Imme's gist[2].
  183. #
  184. # [1]: https://stackoverflow.com/a/30408189/835945
  185. # [2]: https://gist.github.com/imme-emosol/731338/810d83626a6d79f40a251f250ed4625cac0e731f
  186. #
  187. local uri_re # Imme Emosol's URI pattern
  188. local py_code # code to do the matching
  189. uri_re=$(
  190. echo -n '\b'
  191. ## protocol identifier
  192. #
  193. echo -n '(?:(?:https?|ftp)://)'
  194. ## user:pass authentication
  195. #
  196. echo -n '(?:\S+(?::\S*)?@)?'
  197. echo -n '(?:'
  198. ## IP address dotted notation octets
  199. #
  200. echo -n '(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])'
  201. echo -n '(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}'
  202. echo -n '(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))'
  203. echo -n '|'
  204. ## host name
  205. #
  206. echo -n '(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)'
  207. ## domain name
  208. #
  209. echo -n '(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*'
  210. ## TLD identifier
  211. #
  212. echo -n '(?:\.(?:[a-z\u00a1-\uffff]{2,}))'
  213. echo -n '|'
  214. ## "localhost"
  215. #
  216. echo -n '(?:localhost)'
  217. echo -n ')'
  218. ## port number
  219. #
  220. echo -n '(?::\d{2,5})?'
  221. ## resource path
  222. #
  223. echo -n '(?:/[^\s]*)?'
  224. echo -n '\b'
  225. )
  226. py_code=$(
  227. echo 'import os'
  228. echo 'import re'
  229. echo 'import sys'
  230. echo
  231. echo 'for uri in re.findall(os.environ["URI_RE"], sys.stdin.read()):'
  232. echo ' print(uri)'
  233. )
  234. URI_RE=$uri_re $__URIPECKER_PYBIN -c "$py_code"
  235. }
  236. __uripecker__minimize() {
  237. #
  238. # Strip, squash spaces and replace tabs
  239. #
  240. sed '
  241. s/\t/ /g
  242. s/ */ /g
  243. s/^ //
  244. s/ $//
  245. '
  246. }
  247. __uripecker__urlquote() {
  248. #
  249. # URL-quote query $1 and print result
  250. #
  251. local query=$1
  252. debug -v query
  253. #shellcheck disable=SC2028
  254. LC_ALL=en_US.UTF-8 $__URIPECKER_PYBIN -c "$(
  255. echo "import $__URIPECKER_PYMOD"
  256. echo 'import sys'
  257. echo "print($__URIPECKER_PYMOD.quote_plus(sys.argv[1]))"
  258. )" "$query"
  259. #FIXME: There should be proper way w/o touching LC_ALL
  260. }
  261. __shellfu_uripecker__init() {
  262. #
  263. # See what python version is here
  264. #
  265. if python3 --version 2>/dev/null \
  266. | grep -qF 'Python 3.'; then
  267. __URIPECKER_PYBIN=python3
  268. __URIPECKER_PYMOD=urllib.parse
  269. else
  270. __URIPECKER_PYBIN=python
  271. __URIPECKER_PYMOD=urllib
  272. fi
  273. }
  274. #shellfu module-version=__MKIT_PROJ_VERSION__