text_region.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. import hashlib
  2. import subprocess
  3. from urllib.parse import urlparse
  4. import unicodedata
  5. from html.parser import HTMLParser
  6. import math
  7. import os
  8. import cairo
  9. import toaru_fonts
  10. import yutani
  11. def create_from_bmp(path):
  12. if os.path.exists(path) and path.endswith('.bmp'):
  13. return yutani.Sprite.from_file(path).get_cairo_surface()
  14. return None
  15. _emoji_available = os.path.exists('/usr/share/emoji')
  16. if _emoji_available:
  17. _emoji_values = [int(x.replace('.png',''),16) for x in os.listdir('/usr/share/emoji') if x.endswith('.png') and not '-' in x]
  18. _emoji_table = {}
  19. def get_emoji(emoji):
  20. if not emoji in _emoji_table:
  21. _emoji_table[emoji] = cairo.ImageSurface.create_from_png('/usr/share/emoji/' + hex(ord(emoji)).replace('0x','')+'.png')
  22. return _emoji_table[emoji]
  23. class TextUnit(object):
  24. def __init__(self, string, unit_type, font):
  25. self.string = string
  26. self.unit_type = unit_type
  27. self.font = font
  28. self.width = font.width(self.string) if font else 0
  29. self.extra = {}
  30. self.tag_group = None
  31. if self.unit_type == 2 and _emoji_available:
  32. if ord(self.string) > 0x1000 and ord(self.string) in _emoji_values:
  33. self.extra['emoji'] = True
  34. self.extra['img'] = get_emoji(self.string)
  35. self.extra['offset'] = font.font_size
  36. self.string = ""
  37. self.width = font.font_size
  38. def set_tag_group(self, tag_group):
  39. self.tag_group = tag_group
  40. self.tag_group.append(self)
  41. def set_font(self, font):
  42. if 'img' in self.extra: return
  43. self.font = font
  44. self.width = font.width(self.string) if font else 0
  45. def set_extra(self, key, data):
  46. self.extra[key] = data
  47. def __repr__(self):
  48. return "(" + self.string + "," + str(self.unit_type) + "," + str(self.width) + ")"
  49. class TextRegion(object):
  50. def __init__(self, x, y, width, height, font=None):
  51. self.x = x
  52. self.y = y
  53. self.width = width
  54. self.height = height
  55. if not font:
  56. font = toaru_fonts.Font(toaru_fonts.FONT_SANS_SERIF, 13)
  57. self.font = font
  58. self.text = ""
  59. self.lines = []
  60. self.align = 0
  61. self.valign = 0
  62. self.line_height = self.font.font_size
  63. self.text_units = []
  64. self.scroll = 0
  65. self.ellipsis = ""
  66. self.one_line = False
  67. self.base_dir = ""
  68. self.break_all = False
  69. self.title = None
  70. self.max_lines = None
  71. def set_alignment(self, align):
  72. self.align = align
  73. def set_valignment(self, align):
  74. self.valign = align
  75. def set_max_lines(self, max_lines):
  76. self.max_lines = max_lines
  77. self.reflow()
  78. def visible_lines(self):
  79. return int(self.height / self.line_height)
  80. def reflow(self):
  81. self.lines = []
  82. current_width = 0
  83. current_units = []
  84. leftover = None
  85. i = 0
  86. while i < len(self.text_units):
  87. if leftover:
  88. unit = leftover
  89. leftover = None
  90. else:
  91. unit = self.text_units[i]
  92. if unit.unit_type == 3:
  93. self.lines.append(current_units)
  94. current_units = []
  95. current_width = 0
  96. i += 1
  97. continue
  98. if unit.unit_type == 4:
  99. if current_units:
  100. self.lines.append(current_units)
  101. i += 1
  102. self.lines.append([unit])
  103. current_units = []
  104. current_width = 0
  105. i += 1
  106. continue
  107. if current_width + unit.width > self.width:
  108. if not current_units or self.one_line or (self.max_lines is not None and len(self.lines) == self.max_lines - 1):
  109. # We need to split the current unit.
  110. k = len(unit.string)-1
  111. while k and current_width + unit.font.width(unit.string[:k] + self.ellipsis) > self.width:
  112. k -= 1
  113. ellipsis = self.ellipsis
  114. if not k and self.ellipsis:
  115. ellipsis = ""
  116. if not k and (self.one_line or (self.max_lines is not None and len(self.lines) == self.max_lines - 1)):
  117. added_ellipsis = False
  118. while len(current_units) and sum([unit.width for unit in current_units]) + unit.font.width(self.ellipsis) > self.width:
  119. this_unit = current_units[-1]
  120. current_units = current_units[:-1]
  121. current_width = sum([unit.width for unit in current_units])
  122. k = len(this_unit.string)-1
  123. while k and current_width + unit.font.width(this_unit.string[:k] + self.ellipsis) > self.width:
  124. k -= 1
  125. if k:
  126. current_units.append(TextUnit(this_unit.string[:k] + self.ellipsis,this_unit.unit_type,this_unit.font))
  127. added_ellipsis = True
  128. break
  129. if not added_ellipsis:
  130. current_units.append(TextUnit(self.ellipsis,0,unit.font))
  131. else:
  132. current_units.append(TextUnit(unit.string[:k]+ellipsis,unit.unit_type,unit.font))
  133. leftover = TextUnit(unit.string[k:],unit.unit_type,unit.font)
  134. self.lines.append(current_units)
  135. current_units = []
  136. current_width = 0
  137. if self.one_line or (self.max_lines is not None and len(self.lines) == self.max_lines):
  138. return
  139. else:
  140. self.lines.append(current_units)
  141. current_units = []
  142. current_width = 0
  143. if unit.unit_type == 1:
  144. i += 1
  145. else:
  146. current_units.append(unit)
  147. current_width += unit.width
  148. i += 1
  149. if current_units:
  150. self.lines.append(current_units)
  151. def units_from_text(self, text, font=None, whitespace=True):
  152. if not font:
  153. font = self.font
  154. def char_width(char):
  155. if _emoji_available and ord(char) in _emoji_values:
  156. return 2
  157. x = unicodedata.east_asian_width(char)
  158. if x == 'Na': return 1 # Narrow
  159. if x == 'N': return 1 # Narrow
  160. if x == 'A': return 1 # Ambiguous
  161. if x == 'W': return 2 # Wide
  162. if x == 'F': return 1 # Fullwidth (treat as normal)
  163. if x == 'H': return 1 # Halfwidth
  164. print(f"Don't know how wide {x} is, assuming 1")
  165. return 1
  166. def classify(char):
  167. if char == '\n': return 3 # break on line feed
  168. if unicodedata.category(char) == 'Zs': return 1 # break on space
  169. if char_width(char) > 1: return 2 # allow break on CJK characters (TODO: only really valid for Chinese and Japanese; Korean doesn't work this way
  170. if self.break_all: return 2
  171. return 0
  172. units = []
  173. offset = 0
  174. current_unit = ""
  175. while offset < len(text):
  176. c = text[offset]
  177. if not whitespace and c.isspace():
  178. if current_unit:
  179. units.append(TextUnit(current_unit,0,font))
  180. current_unit = ""
  181. units.append(TextUnit(' ',1,font))
  182. offset += 1
  183. continue
  184. x = classify(c)
  185. if x == 0:
  186. current_unit += c
  187. offset += 1
  188. else:
  189. if not current_unit:
  190. units.append(TextUnit(c,x,font))
  191. offset += 1
  192. else:
  193. units.append(TextUnit(current_unit,0,font))
  194. current_unit = ""
  195. if current_unit:
  196. units.append(TextUnit(current_unit,0,font))
  197. return units
  198. def set_one_line(self, one_line=True):
  199. self.one_line = one_line
  200. self.reflow()
  201. def set_ellipsis(self, ellipsis="…"):
  202. self.ellipsis = ellipsis
  203. self.reflow()
  204. def set_text(self, text):
  205. self.text = text
  206. self.text_units = self.units_from_text(text)
  207. self.reflow()
  208. def set_richtext(self, text, html=False):
  209. f = self.font
  210. self.text = text
  211. tr = self
  212. class RichTextParser(HTMLParser):
  213. def __init__(self, html=False):
  214. super(RichTextParser,self).__init__()
  215. self.font_stack = []
  216. self.tag_stack = []
  217. self.current_font = f
  218. self.units = []
  219. self.link_stack = []
  220. self.current_link = None
  221. self.tag_group = None
  222. self.is_html = html
  223. self.whitespace_sensitive = not html
  224. self.autoclose = ['br','meta','input']
  225. self.title = ''
  226. if self.is_html:
  227. self.autoclose.extend(['img','link'])
  228. self.surface_cache = {}
  229. def handle_starttag(self, tag, attrs):
  230. def make_bold(n):
  231. if n == 0: return 1
  232. if n == 2: return 3
  233. if n == 4: return 5
  234. if n == 6: return 7
  235. return n
  236. def make_italic(n):
  237. if n == 0: return 2
  238. if n == 1: return 3
  239. if n == 4: return 6
  240. if n == 5: return 7
  241. return n
  242. def make_monospace(n):
  243. if n == 0: return 4
  244. if n == 1: return 5
  245. if n == 2: return 6
  246. if n == 3: return 7
  247. return n
  248. if tag not in self.autoclose:
  249. self.tag_stack.append(tag)
  250. if tag in ['p','div','h1','h2','h3','li','tr','pre'] and not self.whitespace_sensitive: # etc?
  251. if self.units and self.units[-1].unit_type != 3:
  252. self.units.append(TextUnit('\n',3,self.current_font))
  253. if tag == "b":
  254. self.font_stack.append(self.current_font)
  255. self.current_font = toaru_fonts.Font(make_bold(self.current_font.font_number),self.current_font.font_size,self.current_font.font_color)
  256. elif tag == "i":
  257. self.font_stack.append(self.current_font)
  258. self.current_font = toaru_fonts.Font(make_italic(self.current_font.font_number),self.current_font.font_size,self.current_font.font_color)
  259. elif tag == "color":
  260. self.font_stack.append(self.current_font)
  261. self.current_font = toaru_fonts.Font(self.current_font.font_number,self.current_font.font_size,int(attrs[0][0],16) | 0xFF000000)
  262. elif tag == "mono":
  263. self.font_stack.append(self.current_font)
  264. self.current_font = toaru_fonts.Font(make_monospace(self.current_font.font_number),self.current_font.font_size,self.current_font.font_color)
  265. elif tag == "pre":
  266. self.font_stack.append(self.current_font)
  267. self.current_font = toaru_fonts.Font(make_monospace(self.current_font.font_number),self.current_font.font_size,self.current_font.font_color)
  268. elif tag == "link" and not self.is_html:
  269. target = None
  270. for attr in attrs:
  271. if attr[0] == "target":
  272. target = attr[1]
  273. self.tag_group = []
  274. self.link_stack.append(self.current_link)
  275. self.current_link = target
  276. self.font_stack.append(self.current_font)
  277. self.current_font = toaru_fonts.Font(self.current_font.font_number,self.current_font.font_size,0xFF0000FF)
  278. elif tag == "a":
  279. target = None
  280. for attr in attrs:
  281. if attr[0] == "href":
  282. target = attr[1]
  283. self.tag_group = []
  284. self.link_stack.append(self.current_link)
  285. if target and self.is_html and not target.startswith('http:') and not target.startswith('https:'):
  286. # This is actually more complicated than this check - protocol-relative stuff can work without full URLs
  287. if target.startswith('/'):
  288. base = urlparse(tr.base_dir)
  289. target = f"{base.scheme}://{base.netloc}{target}"
  290. else:
  291. target = tr.base_dir + target
  292. self.current_link = target
  293. self.font_stack.append(self.current_font)
  294. self.current_font = toaru_fonts.Font(self.current_font.font_number,self.current_font.font_size,0xFF0000FF)
  295. elif tag == "h1":
  296. self.font_stack.append(self.current_font)
  297. self.current_font = toaru_fonts.Font(make_bold(self.current_font.font_number),20)
  298. elif tag == "h2":
  299. self.font_stack.append(self.current_font)
  300. self.current_font = toaru_fonts.Font(make_bold(self.current_font.font_number),18)
  301. elif tag == "h3":
  302. self.font_stack.append(self.current_font)
  303. self.current_font = toaru_fonts.Font(make_bold(self.current_font.font_number),16)
  304. elif tag == "img":
  305. self.handle_img(tag,attrs)
  306. elif tag == "br":
  307. units = tr.units_from_text('\n', self.current_font)
  308. self.units.extend(units)
  309. else:
  310. pass
  311. def handle_startendtag(self, tag, attrs):
  312. if tag == "img":
  313. self.handle_img(tag,attrs)
  314. elif tag == "br":
  315. units = tr.units_from_text('\n', self.current_font)
  316. self.units.extend(units)
  317. elif tag in ['p','div','h1','h2','h3','tr','pre'] and not self.whitespace_sensitive: # etc?
  318. units = tr.units_from_text('\n', self.current_font)
  319. self.units.extend(units)
  320. else:
  321. # Unknown start/end tag.
  322. pass
  323. def handle_endtag(self, tag):
  324. if not self.tag_stack:
  325. print(f"No stack when trying to close {tag}?")
  326. if self.tag_stack[-1] != tag:
  327. print(f"unclosed tag {self.tag_stack[-1]} when closing tag {tag}")
  328. else:
  329. self.tag_stack.pop()
  330. if tag in ["b","i","color","mono","link","h1","h2","h3","a","pre"]:
  331. self.current_font = self.font_stack.pop()
  332. if tag in ['p','div','h1','h2','h3','li','tr','pre'] and not self.whitespace_sensitive: # etc?
  333. units = tr.units_from_text('\n', self.current_font)
  334. self.units.extend(units)
  335. if tag in ["link","a"]:
  336. self.current_link = self.link_stack.pop()
  337. self.tag_group = None
  338. def handle_data(self, data):
  339. if 'title' in self.tag_stack:
  340. self.title += data
  341. if 'head' in self.tag_stack or 'script' in self.tag_stack: return
  342. if 'pre' in self.tag_stack:
  343. units = tr.units_from_text(data, self.current_font, whitespace=True)
  344. else:
  345. units = tr.units_from_text(data, self.current_font, whitespace=self.whitespace_sensitive)
  346. if self.current_link:
  347. for u in units:
  348. u.set_extra('link',self.current_link)
  349. if self.tag_group is not None:
  350. for u in units:
  351. u.set_tag_group(self.tag_group)
  352. self.units.extend(units)
  353. def handle_img(self, tag, attrs):
  354. target = None
  355. for attr in attrs:
  356. if attr[0] == "src":
  357. target = attr[1]
  358. if target and self.is_html and not target.startswith('http:') and not target.startswith('https:'):
  359. # This is actually more complicated than this check - protocol-relative stuff can work without full URLs
  360. if target.startswith('/'):
  361. base = urlparse(tr.base_dir)
  362. target = f"{base.scheme}://{base.netloc}{target}"
  363. else:
  364. target = tr.base_dir + target
  365. else:
  366. if target and not self.is_html and not target.startswith('/'):
  367. target = tr.base_dir + target
  368. if target and self.is_html and not target.startswith('http:'):
  369. target = tr.base_dir + target
  370. if target and target.startswith('http:'):
  371. x = hashlib.sha512(target.encode('utf-8')).hexdigest()
  372. p = f'/tmp/.browser-cache.{x}'
  373. if not os.path.exists(p):
  374. try:
  375. subprocess.check_output(['fetch','-o',p,target])
  376. except:
  377. print(f"Failed to download image: {target}")
  378. pass
  379. target = p
  380. if target and os.path.exists(target):
  381. try:
  382. img = self.img_from_path(target)
  383. except:
  384. print(f"Failed to load image {target}, going to show backup image.")
  385. img = None
  386. if not img:
  387. img = self.img_from_path('/usr/share/icons/16/missing.bmp')
  388. chop = math.ceil(img.get_height() / tr.line_height)
  389. group = []
  390. for i in range(chop):
  391. u = TextUnit("",4,self.current_font)
  392. u.set_extra('img',img)
  393. u.set_extra('offset',i * tr.line_height)
  394. if self.current_link:
  395. u.set_extra('link',self.current_link)
  396. u.set_tag_group(group)
  397. u.width = img.get_width()
  398. self.units.append(u)
  399. def fix_whitespace(self):
  400. out_units = []
  401. last_was_whitespace = False
  402. for unit in self.units:
  403. if unit.unit_type == 3:
  404. last_was_whitespace = True
  405. out_units.append(unit)
  406. elif unit.unit_type == 1 and unit.string == ' ':
  407. if last_was_whitespace:
  408. continue
  409. last_was_whitespace = True
  410. out_units.append(unit)
  411. else:
  412. last_was_whitespace = False
  413. out_units.append(unit)
  414. self.units = out_units
  415. def img_from_path(self, path):
  416. if not path in self.surface_cache:
  417. s = create_from_bmp(path)
  418. self.surface_cache[path] = s
  419. return s
  420. else:
  421. return self.surface_cache[path]
  422. parser = RichTextParser(html=html)
  423. parser.feed(text)
  424. self.title = parser.title
  425. if html:
  426. parser.fix_whitespace()
  427. self.text_units = parser.units
  428. self.reflow()
  429. def set_font(self, new_font):
  430. self.font = new_font
  431. self.line_height = self.font.font_size
  432. self.reflow()
  433. def set_line_height(self, new_line_height):
  434. self.line_height = new_line_height
  435. self.reflow()
  436. def resize(self, new_width, new_height):
  437. needs_reflow = self.width != new_width
  438. self.width = new_width
  439. self.height = new_height
  440. if needs_reflow:
  441. self.reflow()
  442. def move(self, new_x, new_y):
  443. self.x = new_x
  444. self.y = new_y
  445. def get_offset_at_index(self, index):
  446. """ Only works for one-liners... """
  447. if not self.lines:
  448. return None, (0, 0, 0, 0)
  449. left_align = 0
  450. xline = self.lines[0]
  451. if self.align == 1: # right align
  452. left_align = self.width - sum([u.width for u in xline])
  453. elif self.align == 2: # center
  454. left_align = int((self.width - sum([u.width for u in xline])) / 2)
  455. i = 0
  456. for unit in xline:
  457. if i == index:
  458. return unit, (0, left_align, left_align, i)
  459. left_align += unit.width
  460. i += 1
  461. return None, (0, left_align, left_align, i)
  462. def pick(self, x, y):
  463. # Determine which line this click belongs in
  464. if x < self.x or x > self.x + self.width or y < self.y or y > self.y + self.height:
  465. return None, None
  466. top_align = 0
  467. if len(self.lines) < int(self.height / self.line_height):
  468. if self.valign == 1: # bottom
  469. top_align = self.height - len(self.lines) * self.line_height
  470. elif self.valign == 2: # middle
  471. top_align = int((self.height - len(self.lines) * self.line_height) / 2)
  472. new_y = y - top_align - self.y - 2 # fuzz factor
  473. line = int(new_y / self.line_height)
  474. if line < len(self.lines[self.scroll:]):
  475. left_align = 0
  476. xline = self.lines[self.scroll+line]
  477. if self.align == 1: # right align
  478. left_align = self.width - sum([u.width for u in xline])
  479. elif self.align == 2: # center
  480. left_align = int((self.width - sum([u.width for u in xline])) / 2)
  481. i = 0
  482. for unit in xline:
  483. if x >= self.x + left_align and x < self.x + left_align + unit.width:
  484. return unit, (line, left_align, x - self.x, i)
  485. left_align += unit.width
  486. i += 1
  487. return None, (line, left_align, x - self.x, i)
  488. return None, None
  489. def click(self, x, y):
  490. unit, _ = self.pick(x,y)
  491. return unit
  492. def draw(self, context):
  493. current_height = self.line_height
  494. top_align = 0
  495. if len(self.lines) < int(self.height / self.line_height):
  496. if self.valign == 1: # bottom
  497. top_align = self.height - len(self.lines) * self.line_height
  498. elif self.valign == 2: # middle
  499. top_align = int((self.height - len(self.lines) * self.line_height) / 2)
  500. su = context.get_cairo_surface() if 'get_cairo_surface' in dir(context) else None
  501. cr = cairo.Context(su) if su else None
  502. for line in self.lines[self.scroll:]:
  503. if current_height > self.height:
  504. break
  505. left_align = 0
  506. if self.align == 1: # right align
  507. left_align = self.width - sum([u.width for u in line])
  508. elif self.align == 2: # center
  509. left_align = int((self.width - sum([u.width for u in line])) / 2)
  510. for unit in line:
  511. if unit.unit_type == 4:
  512. cr.save()
  513. extra = 3
  514. cr.translate(self.x + left_align, self.y + current_height + top_align)
  515. if 'hilight' in unit.extra and unit.extra['hilight']:
  516. cr.rectangle(0,-self.line_height+extra,unit.extra['img'].get_width(),self.line_height)
  517. cr.set_source_rgb(1,0,0)
  518. cr.fill()
  519. cr.rectangle(0,-self.line_height+extra,unit.extra['img'].get_width(),self.line_height)
  520. cr.set_source_surface(unit.extra['img'],0,-unit.extra['offset']-self.line_height+extra)
  521. cr.fill()
  522. cr.restore()
  523. elif unit.unit_type == 2 and 'emoji' in unit.extra:
  524. cr.save()
  525. extra = 3
  526. cr.translate(self.x + left_align, self.y + current_height + top_align -self.line_height+extra)
  527. if unit.extra['img'].get_height() > self.line_height - 3:
  528. scale = (self.line_height - 3) / unit.extra['img'].get_height()
  529. cr.scale(scale,scale)
  530. cr.rectangle(0,0,unit.extra['img'].get_width(),unit.extra['img'].get_height())
  531. cr.set_source_surface(unit.extra['img'],0,0)
  532. cr.fill()
  533. cr.restore()
  534. elif unit.font:
  535. unit.font.write(context, self.x + left_align, self.y + current_height + top_align, unit.string)
  536. left_align += unit.width
  537. current_height += self.line_height