module SimpleIDN

IdnaMappingTable-9.0.0.txt Date: 2016-06-16, 13:35:01 GMT © 2016 Unicode®, Inc. Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. For terms of use, see www.unicode.org/terms_of_use.html

Unicode IDNA Compatible Preprocessing (UTS #46) For documentation, see www.unicode.org/reports/tr46/ Total code points: 1114112

Constants

ACE_PREFIX
ASCII_MAX
DOT
EMPTY
LABEL_SEPERATOR_RE
TRANSITIONAL

See UTS46 Table 1

UTS64MAPPING

Define a basic uppercase to lowercase mapping for ASCII a..z

VERSION

Public Instance Methods

to_ascii(domain, transitional = false) click to toggle source

Converts a UTF-8 unicode string to a punycode ACE string.

Example

SimpleIDN.to_ascii("møllerriis.com")
 => "xn--mllerriis-l8a.com"
# File lib/simpleidn.rb, line 233
def to_ascii(domain, transitional = false)
  return nil if domain.nil?
  mapped_domain = uts46map(domain.encode(Encoding::UTF_8), transitional)
  domain_array = mapped_domain.split(LABEL_SEPERATOR_RE, -1) rescue []
  out = []
  content = false
  domain_array.each do |s|
    # Skip leading empty labels
    next if s.empty? && !content
    content = true

    out << (s.codepoints.any? { |cp| cp > ASCII_MAX } ? ACE_PREFIX + Punycode.encode(s) : s)
  end

  # If all we had were dots; return "."
  out = [DOT] if out.empty? && !mapped_domain.empty?

  out.join(DOT).encode(domain.encoding)
end
to_unicode(domain, transitional = false) click to toggle source

Converts a punycode ACE string to a UTF-8 unicode string.

Example

SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
 => "møllerriis.com"
# File lib/simpleidn.rb, line 257
def to_unicode(domain, transitional = false)
  return nil if domain.nil?
  mapped_domain = uts46map(domain.encode(Encoding::UTF_8), transitional)
  domain_array = mapped_domain.split(LABEL_SEPERATOR_RE, -1) rescue []
  out = []
  content = false
  domain_array.each do |s|
    # Skip leading empty labels
    next if s.empty? && !content
    content = true

    out << (s.start_with?(ACE_PREFIX) ? Punycode.decode(s[ACE_PREFIX.length..-1]) : s)
  end

  # If all we had were dots; return "."
  out = [DOT] if out.empty? && !mapped_domain.empty?

  out = out.join(DOT)
  # Try to convert to the input encoding, but don't error on failure
  # Given that the input is plain 7-bit ASCII only, converting back
  # frequently fails.  We will try to allow UTF-16 and Unicode encodings
  begin
    out.encode!(domain.encoding)
  rescue Encoding::UndefinedConversionError
  end
  out
end
uts46map(str, transitional = false) click to toggle source

Applies UTS46 mapping to a Unicode string Returns a UTF-8 string in Normalization Form C (NFC)

# File lib/simpleidn.rb, line 222
def uts46map(str, transitional = false)
  mapped = str.codepoints.map { |cp| UTS64MAPPING.fetch(cp, cp) }
  mapped = mapped.map { |cp| TRANSITIONAL.fetch(cp, cp) } if transitional
  mapped = mapped.flatten.map { |cp| cp.chr(Encoding::UTF_8) }.join(EMPTY)
  mapped.to_nfc
end