forked from diaspora/diaspora
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdirection_detector.rb
55 lines (51 loc) · 1.56 KB
/
direction_detector.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# coding: utf-8
# Copyright (c) 2010-2011, Diaspora Inc. This file is
# licensed under the Affero General Public License version 3 or later. See
# the COPYRIGHT file.
# Deeply inspired by https://gitorious.org/statusnet/mainline/blobs/master/plugins/DirectionDetector/DirectionDetectorPlugin.php
class String
RTL_RANGES = [
[1536, 1791], # arabic, persian, urdu, kurdish, ...
[65136, 65279], # arabic peresent 2
[64336, 65023], # arabic peresent 1
[1424, 1535], # hebrew
[64256, 64335], # hebrew peresent
[1792, 1871], # syriac
[1920, 1983], # thaana
[1984, 2047], # nko
[11568, 11647] # tifinagh
]
RTL_CLEANER_REGEXES = [ /@[^ ]+|#[^ ]+/u, # mention, tag
/^RT[: ]{1}| RT | RT: |[♺♻:]/u # retweet
]
def is_rtl?
return false if self.strip.empty?
count = 0
self.split(" ").each do |word|
if starts_with_rtl_char?(word)
count += 1
else
count -= 1
end
end
return true if count > 0 # more than half of the words are rtl words
return starts_with_rtl_char?(self) # otherwise let the first word decide
end
# Diaspora specific
def cleaned_is_rtl?
string = String.new(self)
RTL_CLEANER_REGEXES.each do |cleaner|
string.gsub!(cleaner, '')
end
string.is_rtl?
end
def starts_with_rtl_char?(string = self)
stripped = string.strip
return false if stripped.empty?
char = stripped.unpack('U*').first
RTL_RANGES.each do |limit|
return true if char >= limit[0] && char <= limit[1]
end
return false
end
end