|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file:
-- http://anggtwu.net/blogme3/detect-encoding.lua.html
-- http://anggtwu.net/blogme3/detect-encoding.lua
-- (find-angg "blogme3/detect-encoding.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
--
-- (defun l () (interactive) (find-angg "blogme3/detect-encoding.lua"))
-- «.bad-conversions» (to "bad-conversions")
-- «.re» (to "re")
DetectEncoding = Class {
type = "DetectEncoding",
new = function (fname, root)
local fullfname = (root or "~/")..fname
local bigstr = ee_readfile(fullfname)
local lines = splitlines(bigstr)
local nlines = #lines
return DetectEncoding {lines=lines, nlines=nlines, bigstr=bigstr}
end,
__index = {
lastlines0 = function (de, n)
local b = max(de.nlines - n, 1)
return table.concat(de.lines, "\n", b, de.nlines)
end,
lastlines = function (de, n)
local ll = de:lastlines0(n or 7)
if ll:match("\n") then ll = ll:gsub("^(.*\f)([^\f]*)$", "%2") end
return ll
end,
firstandlastlines = function (de, n)
if de.nlines < 2 then return table.concat(de.lines, "\n") end
return de.lines[1].."\n"..de:lastlines(n)
end,
explicitencoding = function (de, n)
local str = de:firstandlastlines(n)
if str:match("utf%-8%-unix") then return "utf-8-unix" end
if str:match("raw%-text%-unix") then return "raw-text-unix" end
if str:match("no%-conversion") then return "raw-text-unix" end
end,
guessencoding0 = function (de)
local nu8, nother = measure_utf8_ness(de.bigstr)
de.nu8, de.nother = nu8, nother
if nu8 == 0 and nother == 0 then return "any" end
if nu8 > 0 and nother == 0 then return "utf-8-unix" end
if nu8 == 0 and nother > 0 then return "raw-text-unix" end
end,
guessencoding = function (de, n)
de.enc = de:explicitencoding(n) or de:guessencoding0()
return de.enc
end,
},
}
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "detect-encoding.lua"
de = DetectEncoding.new("TODO")
= de:lastlines0(7)
= de:lastlines(7)
= de:lastlines()
= de:firstandlastlines()
= de:explicitencoding()
str = de:firstandlastlines()
= str
= str:match("utf")
= str:match("utf-8")
= DetectEncoding.new("TODO"):guessencoding()
= "\f"
--]]
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "detect-encoding.lua"
QUIET = 1
dofile "anggmake.lua"
PPV(anggtranslate) -- (find-blogme3 "anggmake.lua" "anggtranslate")
PPV(escriptstems) -- (find-blogme3 "anggmake.lua" "escriptstems")
filelist = map(function (s) return "e/"..s..".e" end, escriptstems)
filelist = anggtranslate
PPV(filelist)
for _,fname in ipairs(filelist) do
local de = DetectEncoding.new(fname)
local enc = de:explicitencoding()
-- print(enc, fname)
-- if not enc then print(fname) end
if not enc then
local nu8, nother = measure_utf8_ness(de.bigstr)
if nu8 > 0 or nother > 0 then
-- print(nu8, nother, fname)
-- if nu8 > 0 and nother > 0 then print(nu8, nother, fname) end
if nu8 > 0 and nother == 0 then print(nu8, nother, fname) end
end
end
end
fa = function (fname) return format('(find-angg "%s")', fname) end
filelist = map(function (s) return "e/"..s..".e" end, escriptstems)
for _,fname in ipairs(filelist) do
local de = DetectEncoding.new(fname)
local enc = de:explicitencoding()
if enc == "raw-text-unix" then print(fa(fname)) end
end
-- (find-angg "LUA/lua50init.lua" "u8c_to_l1")
-- (find-angg "LUA/lua50init.lua" "u8c_to_l1" "measure_utf8_ness")
-- «bad-conversions» (to ".bad-conversions")
-- Look for bad conversions (like the char c2s here???):
-- (hexl-find-file "~/e/page.e~")
-- (hexl-find-file "~/e/page.e")
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
h = function (s) return tonumber(s, 16) end
h = function (s) return format("%c", tonumber(s, 16)) end
= h"40"
fa = function (fname) return format('(find-fline "%s")', fname) end
QUIET = 1
dofile "anggmake.lua"
-- filelist = map(function (s) return "e/"..s..".e" end, escriptstems)
for _,stem in ipairs(escriptstems) do
fname = "~/e/"..stem..".e"
bigstr = ee_readfile(fname)
if bigstr:match(h"c2") then
fname2 = "/tmp/"..stem.."-new.e"
print(fname2)
bigstr2 = bigstr:gsub(h"c2", "")
ee_writefile(fname2, bigstr2)
print(fa(fname))
print(fa(fname2))
end
end
end
--]]
-- «re» (to ".re")
-- (find-es "lua5" "lpeg-quickref")
require "re"
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "detect-encoding.lua"
--]]