Technopedia Center
PMB University Brochure
Faculty of Engineering and Computer Science
S1 Informatics S1 Information Systems S1 Information Technology S1 Computer Engineering S1 Electrical Engineering S1 Civil Engineering

faculty of Economics and Business
S1 Management S1 Accountancy

Faculty of Letters and Educational Sciences
S1 English literature S1 English language education S1 Mathematics education S1 Sports Education
teknopedia

teknopedia

teknopedia

teknopedia

teknopedia

teknopedia
teknopedia
teknopedia
teknopedia
teknopedia
teknopedia
  • Registerasi
  • Brosur UTI
  • Kip Scholarship Information
  • Performance
  1. Wiktionary
  2. Module:ceb-badlit sc
Module:ceb-badlit sc
From Wiktionary, the free dictionary

The following documentation is located at Module:ceb-badlit sc/documentation. [edit]
Useful links: subpage list • links • transclusions • testcases • sandbox

Powers {{ceb-bad sc}}, which generates Badlit forms of Cebuano words.

-- Based on [[Module:tl-bay sc]] by [[User:Ysrael214]], in turn based on [[Module:tl-pron]]. See respective modules for attribution.

local export = {}

local lang = require("Module:languages").getByCode("ceb")
local sc_Tglg = require("Module:scripts").getByCode("Tglg")

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local ulower = mw.ustring.lower

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local TILDE = u(0x0303) -- tilde =  ̃
local DIA = u(0x0308) -- diaeresis =  ̈
local MACRON = u(0x0304) -- macron 

local vowel = "aeəiouàèìòù" -- vowel
local V = "[" .. vowel .. "]"
local accent = AC .. GR .. CFLEX .. MACRON
local accent_c = "[" .. accent .. "]"
local stress_c = "[" .. AC .. GR .. "]"
local separator = accent ..  "# ./"
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]" -- consonant

local baybayin_chars = { 
	["a"] = "ᜀ", 
	["i"] = "ᜁ", 
	["u"] = "ᜂ",
	["b"] = "ᜊ", 
	["k"] = "ᜃ", 
	["d"] = "ᜇ", 
	["g"] = "ᜄ", 
	["h"] = "ᜑ", 
	["l"] = "ᜎ",
	["m"] = "ᜋ",
	["n"] = "ᜈ",
	["ŋ"] = "ᜅ",
	["p"] = "ᜉ",
	["r"] = "ᜇ",
	["s"] = "ᜐ",
	["t"] = "ᜆ",
	["w"] = "ᜏ",
	["y"] = "ᜌ"
}

local baybayin_marks = {
	["a"] = "",
	["i"] = "ᜒ",
	["u"] = "ᜓ",
	["+"] = "᜔",
	["/"] = "᜕"
}

local baybayin_replace_word = {
	["mga"] = "manga"
}

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
	local retval, nsubs = rsubn(term, foo, bar)
	return retval, nsubs > 0
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- ĵ and ɟ are used internally to represent [d͡ʒ] and [j]

function export.transcribe(text, trad, diph)
	local debug = {}

	text = ulower(text or mw.loadData("Module:headword/data").pagename)
	-- decompose everything but ñ and ü
	text = mw.ustring.toNFD(text)
	text = rsub(text, "." .. "[" .. TILDE .. DIA .."]", {
		["n" .. TILDE] = "ñ",
		["u" .. TILDE] = "ü",
		["e" .. DIA] = "ə",
	})
	
	-- convert commas and en/en dashes to IPA foot boundaries
	text = rsub(text, "%s*[,–—]%s*", " | ")
	-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
	text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")

	-- canonicalize multiple spaces and remove leading and trailing spaces
	local function canon_spaces(text)
		text = rsub(text, "%s+", " ")
		text = rsub(text, "^ ", "")
		text = rsub(text, " $", "")
		return text
	end

	text = canon_spaces(text)

	local words = rsplit(text, " ")
	
	for i, word in ipairs(words) do
		-- Remove accent mark in checking
		if baybayin_replace_word[rsub(word, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")] then
			words[i] = baybayin_replace_word[rsub(word, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")]
		end
	end

	text = table.concat(words, " ")
	
	-- Convert slashes to bantasan, kulit divider
	text = rsub(text, "//", " ᜶ ")
	text = rsub(text, "/", trad and ' ᜶ ' or " ᜵ ")

	-- Convert hyphens to dot
	text = rsub(text, "%-", ".")
	-- canonicalize multiple spaces again, which may have been introduced by hyphens
	text = canon_spaces(text)
	-- now eliminate punctuation
	text = rsub(text, "[!?']", "")
	-- put # at word beginning and end and double ## at text/foot boundary beginning/end
	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	
	-- Move this early for now
	--c, gü/gu+e or i, q
	text = rsub(text, "c([ie])", "s%1")
	text = rsub(text, "([aeëiou])gü([ie])", "%1ɡw%2")
	text = rsub(text, "gü([ie])", "ɡuw%1")
	text = rsub(text, "gu([ie])", "ɡ%1")
	text = rsub(text, "qu([ie])", "k%1")
	text = rsub(text, "ü", "u") 
	
	--ll
	text = rsub(text, "ll([i]?)([aeëiou])", "ly%2")
	
	-- Correction for vowels with in-between glottal stop, now default
	text = rsub_repeatedly(text, "(" .. V .. ")(" .. V .. ")", "%1.%2")

	table.insert(debug, text)
	
	-- Reenable "j" sound be equivalent to "dy"
	-- Ex. gaja = ga(r)ya not gariya
	text = rsub(text, "dj(".. V .. ")"  , "dy%1")
	text = rsub(text, "j(" .. V .. ")", "dy%1")

	-- handle certain combinations; ch ng and sh handling needs to go first
	text = rsub(text, "([t]?)ch", "ts")
	text = rsub(text, "([n]?)g̃", "ŋ") -- Spanish spelling support
	text = rsub(text, "ng", "ŋ")
	text = rsub(text, "sh", "sy")
	
	--ck
	text = rsub(text, "ck", "k") -- foreign sound in case

	--x
	text = rsub(text, "([#])x([aeëiou])", "%1s%2")
	text = rsub(text, "x", "ks")
	
	table.insert(debug, text)

	--alphabet-to-phoneme
	text = rsub(text, "[cgjñqrvz7]",
	--["g"]="ɡ":  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{ ["c"] = "k", ["g"] = "ɡ", ["j"] = "ĵ", ["ñ"] = "nj", ["q"] = "k", ["v"] = "b", ["z"] = "s"})

	--r
	text = rsub(text, "rr", "r")
	
	--determining whether "y" is a consonant or a vowel
	--Badlitan treats as consonant regardless
	text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	text = rsub(text,"y([ˈˌ.]?)([bćĉdɡhjĵklmnɲŋpɾrsʃtwɟʔ" .. vowel .. "])","i%1%2")
	text = rsub(text, "y#", "i")
	text = rsub(text, "w(" .. V .. ")","w%1")
	text = rsub(text,"w([ˈˌ]?)([bćĉdɡhjĵklmnɲŋpɾrsʃtwɟʔ])","u%1%2")
	text = rsub(text, "w#","u")
	--text = rsub(text, "sɟ", "ʃ")

	table.insert(debug, text)

	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*)(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. accent_c .. "*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	-- Any aeo, or stressed iu, should be syllabically divided from a following aeo or stressed iu.
	text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)([aeo])", "%1.%2")
	text = rsub_repeatedly(text, "([aeo]" .. accent_c .. "*)(" .. V .. stress_c .. ")", "%1.%2")
	text = rsub(text, "([əiu]" .. stress_c .. ")([aeo])", "%1.%2")
	text = rsub_repeatedly(text, "([əiu]" .. stress_c .. ")(" .. V .. stress_c .. ")", "%1.%2")
	text = rsub_repeatedly(text, "i(" .. accent_c .. "*)i", "i%1.i")
	text = rsub_repeatedly(text, "u(" .. accent_c .. "*)[ou]", "u%1.u")

	table.insert(debug, text)
	
	-- Remove accent marks
	text = rsub(text, "^(.*)(" .. accent_c .. ")(.*)$", "%1%3")

    table.insert(debug,text)

	if (not diph) then
    	--Corrections for diphthongs
	    text = rsub(text,"([aeəou])i","%1j") --y
	    text = rsub(text,"([aeəio])u","%1w") --w
    end

    table.insert(debug, text)
    
    -- After processing pronunciation, Badlit Start Translate
    text = rsub(text, "[əei]", "i")
    text = rsub(text, "[ou]", "u")
 
	-- Remove /kt/ like "abstrakt"
    text = rsub(text, "kt([#.])", "k%1")
    
    -- Check if there are errors with vowels again
    text = rsub(text,"([aiu])([^.]?)([aəiu])","%1.%2%3")
    
    local function baybay_syllable(syll, post, last_vowel)
    	
    	syll2 = ""
    		
		local bay_double = {
			["ĵ"] = "d",
			["ɡ"] = "g", ["ŋ"] = "N",
		}
			
    	local function baybay(character)
    		local bay_soundpre = ''
			character = rsub(character, "[ɡ]", "g")
			
			if character == 'ĵ' 
then
    			bay_soundpre = bay_double[character]
    			bay_soundpre = baybayin_chars[bay_soundpre] .. baybayin_marks[trad and 'i' or '+']
			end

			if not trad then
				character = rsub(character, "[r]", "d")
			end
			
    		character = rsub(character, "[f]", "p")
    		character = rsub(character, "[ɟj]", "y")
    		character = rsub(character, "[N]", "ŋ")

    		return bay_soundpre .. baybayin_chars[character]
    	end
  
		if not trad then
			-- Remove /h/ as it is not pronounced in between
			syll = rsub(syll, "([^h]+)(h+)", "%1")
			post = rsub(post, "(h+)", "")
			
			post = rsub(post, "ŋ", bay_double["ŋ"])
			post = rsub(post, "ɡ", bay_double["ɡ"])
			post = rsub(post, "ĵ", bay_double["ĵ"] .. 's')
			
			for c in post:gmatch('.') do
				syll2 = syll2 .. baybay(c) .. baybayin_marks['+']
			end
		end
    	syll = rsub(syll, "(" .. C .. "*)(" .. V .. "+)",
			function(consonant, vowel)
				local bay_char = ''
				
				if string.len(consonant) == 0 then
					bay_char = baybay(vowel)
				elseif string.len(consonant) == 1 or string.match(consonant, "[ĵŋɡ]") and string.len(consonant) == 2 then
					bay_char = baybay(consonant) .. baybayin_marks[vowel]
				elseif string.match(consonant, "^(.*)ll$") then
					for c in consonant:gmatch('^(.)ll$') do
						bay_char = bay_char .. baybay(c) .. baybayin_marks[trad and vowel or '+']
					end
					
					bay_char = bay_char .. baybay("l") .. baybayin_marks[trad and "i" or '+']
					bay_char = bay_char .. baybay("y") .. baybayin_marks[vowel]
				else
					-- Two character unicode problems
					consonant = rsub(consonant, "ŋ", bay_double["ŋ"])
					consonant = rsub(consonant, "ɡ", bay_double["ɡ"])
					consonant = rsub(consonant, "ɟ", "y")

					for c in consonant:gmatch('.') do
						bay_char = bay_char .. baybay(c) .. baybayin_marks[trad and (last_vowel or vowel) or '+']
						last_vowel = nil
					end
					
					bay_char = rsub(bay_char, baybayin_marks['+'] .. "$", baybayin_marks[vowel])
				end
				return bay_char
			end
		)
		
    	return syll .. syll2
    end
   
    local words = rsplit(text, " ")
	for i, word in ipairs(words) do
		
		-- (C)/y/ and --(C)w fixes
		-- /h/ being pronounced like fahm, paham
		if trad then
			word = rsub(word, "([^w" .. vowel .. separator .. "])(w)(" .. V .. ")(" .. C .. "*)([.#]+)", "%1u.%2%3%4%5")
			word = rsub(word, "([^ɟ" .. vowel .. separator .. "])(ɟ)(" .. V .. ")(" .. C .. "*)([.#]+)", "%1i.%2%3%4%5")
			word = rsub(word, "(" .. C .. "*)(" .. V .. ")(h)(" .. C .. "+)([.#]+)", "%1%2.%3%2%4%5")
		end
		
		local syllables = rsplit(word, "[.]")
		local last_vowel = nil
		for j = 1, #syllables do
			if string.match(syllables[j], V) then
				syllables[j] = rsub(syllables[j], "^([#]*)(" .. C .. "*)(" .. V .. "+)(" .. C .. "*)([#]*)$",
					function(temp1 ,pre, vowel, post, temp2)
						retval = temp1 .. baybay_syllable(pre .. vowel, post, last_vowel) .. temp2
						last_vowel = string.match(post, "[mn]") and vowel or nil
						return retval
					end
				)
			elseif not string.match(syllables[j], "[᜵᜶]") then
				-- This is only a fallback when no vowel is entered
				syllables[j] = rsub(syllables[j], "^([#]*)(" .. C .. "+)([#]*)$",
					function(temp1 , consonant , temp2)
						if trad then
							return temp1 .. baybay_syllable(consonant .. "a", "") .. temp2
						else
							return temp1 .. baybay_syllable("", consonant) .. temp2
						end
					end
				)
			end
		end
		words[i] = table.concat(syllables, "")
	end
    
    text = table.concat(words, " ")

	-- remove # symbols at word and text boundaries
	text = rsub(text, "#", "")
	
	text = canon_spaces(text)

	return mw.ustring.toNFC(text)
end

function export.show(frame)
	local params = {
		[1] = {},
		["trad"] = {},
		["diph"] = {},
		["disp"] = {},
		["pre"] = {},
		["tr"] = {},
		["r"] = {},
		-- ["bullets"] = {type = "number", default = 1},
	}

	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)

	local results = {}

	local text = args[1] or mw.loadData("Module:headword/data").pagename
	local disp = args.disp or false
	local trad = args.trad or false
	local diph = args.diph or false
	
	results = export.transcribe(text, trad, diph)
	
	-- Baybayin to Latin
	local tr = args["tr"] or 0
	if tr == '1' then
		tr = (lang:transliterate(results, sc_Tglg))
		tr =  rsub(tr, "%s[,]", ",")
		tr =  rsub(tr, "%s[.]", ".")
		tr = ' (' .. tr ..  ')'
	elseif tr == '2' then
		tr = text
		tr = rsub(tr, "[.]", "")
		tr = rsub(tr, "//", ".")
		tr = rsub(tr, "/", ",")
		tr = ' (' .. tr ..  ')'
	else
		tr = ''
	end

	local pre = args.pre and args.pre .. " " or ""
	
	if disp then
		results = '<span class="' .. sc_Tglg:getCode()  ..  '" lang="'  .. lang:getCode()  .. '">' .. results ..  "</span>"
	else
		results = results 
	end
	
	return pre .. results ..  tr
end

return export
Retrieved from "https://en.wiktionary.org/w/index.php?title=Module:ceb-badlit_sc&oldid=88556127"
Category:
  • Cebuano modules

  • indonesia
  • Polski
  • العربية
  • Deutsch
  • English
  • Español
  • Français
  • Italiano
  • مصرى
  • Nederlands
  • 日本語
  • Português
  • Sinugboanong Binisaya
  • Svenska
  • Українська
  • Tiếng Việt
  • Winaray
  • 中文
  • Русский
Sunting pranala
Pusat Layanan

UNIVERSITAS TEKNOKRAT INDONESIA | ASEAN's Best Private University
Jl. ZA. Pagar Alam No.9 -11, Labuhan Ratu, Kec. Kedaton, Kota Bandar Lampung, Lampung 35132
Phone: (0721) 702022
Email: pmb@teknokrat.ac.id