mirror of
https://salsa.debian.org/debian/debian-reference.git
synced 2026-01-11 20:07:28 +00:00
Improve PO file checker
Signed-off-by: Osamu Aoki <osamu@debian.org>
This commit is contained in:
parent
335a23012b
commit
68800a35ea
3 changed files with 176 additions and 152 deletions
4
Makefile
4
Makefile
|
|
@ -834,6 +834,10 @@ check:
|
|||
for XX in $(foreach LX, $(LANGPO), $(DPO)/$(LX).po); do \
|
||||
$(MSGCAT) --no-wrap $$XX | grep -e '^msgstr "http.*%' || true ;\
|
||||
done
|
||||
# check for XML sanity in PO%
|
||||
for XX in $(foreach LX, $(LANGPO), $(DPO)/$(LX).po); do \
|
||||
$(DBIN)/xmlpo $$XX || true ;\
|
||||
done
|
||||
|
||||
stat:
|
||||
@$(call check-command, msgcat, gettext)
|
||||
|
|
|
|||
|
|
@ -164,6 +164,8 @@ Let's assume you are updating "fr":
|
|||
$ make wrap LANGPO=fr; make po LANGPO=fr # check fuzzy.log
|
||||
... hack-hack-hack
|
||||
$ make wrap LANGPO=fr; make po LANGPO=fr # check fuzzy.log
|
||||
$ make check LANGPO=fr
|
||||
... check sanity of PO file (optional)
|
||||
$ make test LANGPO=fr
|
||||
... check build/ for good HTML build
|
||||
$ git add po/fr.po
|
||||
|
|
@ -172,6 +174,10 @@ Let's assume you are updating "fr":
|
|||
$ gitk --all # check no remote updates
|
||||
$ git push origin latest
|
||||
```
|
||||
Here, `make check` is an optional step. This can help to identify the error
|
||||
location in the PO file easier than tracing errors in the test HTML build. If
|
||||
you intentionally add supplemental contents with extra XML tags, you can add a
|
||||
translator comment "`skip-tag-match`" for each such PO entry.
|
||||
|
||||
If merging is more complicated, you can create a local topic branch and work.
|
||||
You can use such a topic branch as a remote backup, too.
|
||||
|
|
|
|||
318
bin/xmlpo
318
bin/xmlpo
|
|
@ -18,188 +18,202 @@ along with this program; if not, write to the Free Software
|
|||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||
USA
|
||||
"""
|
||||
import argparse
|
||||
import locale
|
||||
import collections
|
||||
import sys
|
||||
import argparse
|
||||
import collections
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# You must install python3-polib package
|
||||
import polib
|
||||
|
||||
|
||||
#######################################################################
|
||||
# Global variables
|
||||
# PO data XML analyzer class
|
||||
#######################################################################
|
||||
verbose = 0 # quiet
|
||||
# verbose = 1: default
|
||||
# verbose = 2: verbose
|
||||
# verbose = 3: debug
|
||||
class XMLPOEntry:
|
||||
def __init__(self, **kwargs):
|
||||
self.linenum = kwargs.get("linenum", 0)
|
||||
self.tcomment = kwargs.get("tcomment", "")
|
||||
self.msgid = kwargs.get("msgid", "")
|
||||
self.msgstr = kwargs.get("msgstr", "")
|
||||
self.skip_fuzzy = kwargs.get("skip_fuzzy", True)
|
||||
self.skip_obsolete = kwargs.get("skip_obsolete", True)
|
||||
self.mask_entity = kwargs.get("mask_entity", True)
|
||||
self.mask_namespace = kwargs.get("mask_namespace", True)
|
||||
self.check_skip_tag_match = kwargs.get("check_skip_tag_match", True)
|
||||
self.xmsgid = self.msgid
|
||||
self.xmsgstr = self.msgstr
|
||||
if self.mask_entity:
|
||||
self.xmsgid = self.xmsgid.replace("&", "_")
|
||||
self.xmsgstr = self.xmsgstr.replace("&", "_")
|
||||
if self.mask_namespace:
|
||||
self.xmsgid = self.xmsgid.replace("xl:href=", "xl_href=")
|
||||
self.xmsgstr = self.xmsgstr.replace("xl:href=", "xl_href=")
|
||||
# msgid xml analyze
|
||||
self.xml_msgid_err = None
|
||||
xml_msgid = ET.fromstring("<msg></msg>")
|
||||
try:
|
||||
xml_msgid = ET.fromstring("<msg>" + self.xmsgid + "</msg>")
|
||||
except ET.ParseError as err:
|
||||
# look for error position
|
||||
col = max(err.position[1] - len("<msg>"), 0)
|
||||
self.xml_msgid_err = polib.escape(self.msgid[:col]) + '"<<< ERROR'
|
||||
except Exception as err:
|
||||
self.xml_msgid_err = "{} error: {}".format(type(err), err)
|
||||
self.xml_msgid_tags = collections.Counter(
|
||||
[element.tag for element in xml_msgid.iter()]
|
||||
)
|
||||
del self.xml_msgid_tags["msg"]
|
||||
# msgstr xml analyz
|
||||
self.xml_msgstr_err = None
|
||||
xml_msgstr = ET.fromstring("<msg></msg>")
|
||||
try:
|
||||
xml_msgstr = ET.fromstring("<msg>" + self.xmsgstr + "</msg>")
|
||||
except ET.ParseError as err:
|
||||
# look for error position
|
||||
col = max(err.position[1] - len("<msg>"), 0)
|
||||
self.xml_msgstr_err = polib.escape(self.msgstr[:col]) + '"<<< ERROR'
|
||||
except Exception as err:
|
||||
self.xml_msgstr_err = "{} error: {}".format(type(err), err)
|
||||
self.xml_msgstr_tags = collections.Counter(
|
||||
[element.tag for element in xml_msgstr.iter()]
|
||||
)
|
||||
del self.xml_msgstr_tags["msg"]
|
||||
return
|
||||
|
||||
def is_unmatched_xml(self):
|
||||
if self.msgstr == "":
|
||||
# ignore not-yet-translated data
|
||||
return False
|
||||
elif self.xml_msgid_err is not None:
|
||||
# ignore non-valid XML in msgid
|
||||
return False
|
||||
elif self.xml_msgstr_err is not None:
|
||||
# ignore non-valid XML in msgstr
|
||||
return False
|
||||
elif "skip-tag-match" in self.tcomment and self.check_skip_tag_match:
|
||||
# ignore data with "skip-tag-match" in its translator comment
|
||||
return False
|
||||
elif self.xml_msgid_tags == self.xml_msgstr_tags:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
#
|
||||
|
||||
def print_error(self):
|
||||
if self.xml_msgid_err is not None:
|
||||
print("E: msgid XML error at {}".format(self.linenum))
|
||||
print(" {}".format(self.xml_msgid_err))
|
||||
if self.xml_msgstr_err is not None:
|
||||
print("E: msgstr XML error at {}".format(self.linenum))
|
||||
print(" {}".format(self.xml_msgstr_err))
|
||||
if self.xml_msgid_err is not None or self.xml_msgstr_err is not None:
|
||||
print(' msgid "{}"'.format(polib.escape(self.msgid)))
|
||||
print(' msgstr "{}"'.format(polib.escape(self.msgstr)))
|
||||
print()
|
||||
|
||||
def print_unmatched_tags(self):
|
||||
# unmatchd tags
|
||||
if self.is_unmatched_xml() is True:
|
||||
print("W: unmatched XML tag at {}".format(self.linenum))
|
||||
print(" msgid_tags = {}".format(self.xml_msgid_tags))
|
||||
print(" msgstr_tags = {}".format(self.xml_msgstr_tags))
|
||||
print(' msgid "{}"'.format(polib.escape(self.msgid)))
|
||||
print(' msgstr "{}"'.format(polib.escape(self.msgstr)))
|
||||
print()
|
||||
return
|
||||
|
||||
|
||||
#######################################################################
|
||||
# main: parse command line parser
|
||||
#######################################################################
|
||||
def main():
|
||||
locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""\
|
||||
xml tag checker for po-file
|
||||
|
||||
When PO file is generated from DocBook XML or similar file, it will contain
|
||||
some XML markers. Many translation errors come from typos around such markers.
|
||||
|
||||
This checker will find unmatched set of XML markers between msgid a msgstr.
|
||||
|
||||
Return 0, if no error. Return count of errors, if the error is found.
|
||||
analyzer for po-file
|
||||
|
||||
copyright 2024 Osamu Aoki <osamu@debian.org>
|
||||
license: MIT
|
||||
|
||||
"""
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="count", default=1, help="verbose")
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--msg",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="print msgid and msgstr for each error",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--ignore-entity",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="ignore entity by replacing & with _",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--test-fuzzy",
|
||||
"--include-fuzzy",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="test applies to fuzzy msg too",
|
||||
help="force to include fuzzy PO entries",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--include-obsolete",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="force to include obsolete PO entries",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--ignore-skip-tag-match",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="force to ignore skip-tag-match in translator comment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--expose-entity",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="force to disable masking of '&' by '_' (expose entity)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--expose-namespace",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="force to disable masking of 'xl:href=' by 'xl_href=' (expose namespace)",
|
||||
)
|
||||
parser.add_argument("pofile", help="po file to be analyzed")
|
||||
#######################################################################
|
||||
# generate argument parser instance
|
||||
#######################################################################
|
||||
args = parser.parse_args()
|
||||
# verbose = args.verbose
|
||||
#######################################################################
|
||||
state = "" # "msgid"/"msgstr"/""
|
||||
state_last = ""
|
||||
msgid_str = ""
|
||||
msgstr_str = ""
|
||||
fuzzy = False
|
||||
msgstr_lnum = 0
|
||||
print("I: Process pofile='{}'".format(args.pofile))
|
||||
print()
|
||||
try:
|
||||
po = polib.pofile(args.pofile)
|
||||
except Exception as err:
|
||||
print("{} error: {} for PO file='{}'".format(type(err), err, args.pofile))
|
||||
sys.exit(1)
|
||||
error_count = 0
|
||||
with open(args.pofile, "r") as fp:
|
||||
for lnum, line in enumerate(fp.readlines()):
|
||||
line = line.strip() # remove NL
|
||||
if line.startswith("msgid"):
|
||||
state = "msgid"
|
||||
msgid_str = line[len("msgid ") :].strip()[1:-1]
|
||||
elif line.startswith("msgstr"):
|
||||
state = "msgstr"
|
||||
msgstr_lnum = lnum
|
||||
msgstr_str = line[len("msgstr ") :].strip()[1:-1]
|
||||
elif line.startswith('"'):
|
||||
if state == "msgid":
|
||||
msgid_str += line[1:-1]
|
||||
elif state == "msgstr":
|
||||
msgstr_str += line[1:-1]
|
||||
else:
|
||||
# line number should start at 1 like editor
|
||||
print("E: **INVALID** PO file line={}: '{}'".format(lnum + 1, line))
|
||||
sys.exit(2)
|
||||
elif line.startswith("#") and "fuzzy" in line:
|
||||
state = "#"
|
||||
fuzzy = True
|
||||
elif line.startswith("#"):
|
||||
state = "#"
|
||||
else:
|
||||
state = ""
|
||||
if state == "" and state_last == "msgstr":
|
||||
fuzzy_in = fuzzy
|
||||
fuzzy = False
|
||||
# ready to report
|
||||
# print("I: ----------------------------------------------------------")
|
||||
if msgid_str == "" or msgstr_str == "" or "<" not in msgid_str:
|
||||
# notworth analyzing
|
||||
continue
|
||||
if not args.test_fuzzy and fuzzy_in:
|
||||
# test_fuzzy=*, fuzzy_in=False -> test
|
||||
# test_fuzzy=True, fuzzy_in=True -> test
|
||||
# test_fuzzy=False, fuzzy_in=True -> don't test
|
||||
continue
|
||||
# normalize
|
||||
msgid_str = msgid_str.replace("xl:href", "href").replace('\\"', '"')
|
||||
msgstr_str = msgstr_str.replace("xl:href", "href").replace('\\"', '"')
|
||||
if args.ignore_entity:
|
||||
msgid_str = msgid_str.replace("&", "_")
|
||||
msgstr_str = msgstr_str.replace("&", "_")
|
||||
# msgstr is not "" and msgid may have XML tag
|
||||
xml_msgid = ET.fromstring("<msg></msg>")
|
||||
xml_msgstr = ET.fromstring("<msg></msg>")
|
||||
err0_str = ""
|
||||
try:
|
||||
xml_msgid = ET.fromstring("<msg>" + msgid_str + "</msg>")
|
||||
except ET.ParseError as err0:
|
||||
valid_msgid = False
|
||||
# look for error position
|
||||
col0 = max(err0.position[1] - len("<msg>"), 0)
|
||||
err0_str = msgid_str[col0 : col0 + 20]
|
||||
except Exception as err0:
|
||||
valid_msgid = False
|
||||
print(f"err0 unexpected {err0=}, {type(err0)=}")
|
||||
else:
|
||||
valid_msgid = True
|
||||
err1_str = ""
|
||||
try:
|
||||
xml_msgstr = ET.fromstring("<msg>" + msgstr_str + "</msg>")
|
||||
except ET.ParseError as err1:
|
||||
valid_msgstr = False
|
||||
# look for error position
|
||||
col1 = max(err1.position[1] - len("<msg>"), 0)
|
||||
err1_str = msgstr_str[col1 : col1 + 20]
|
||||
except Exception as _:
|
||||
valid_msgstr = False
|
||||
else:
|
||||
valid_msgstr = True
|
||||
if valid_msgid and valid_msgstr:
|
||||
tags_msgid = collections.Counter(
|
||||
[element.tag for element in xml_msgid.iter()]
|
||||
)
|
||||
del tags_msgid["msg"]
|
||||
tags_msgstr = collections.Counter(
|
||||
[element.tag for element in xml_msgstr.iter()]
|
||||
)
|
||||
del tags_msgstr["msg"]
|
||||
if tags_msgid == tags_msgstr:
|
||||
# print("I: line={} valid XML and matched XML tags msgid={}".format(msgstr_lnum, tags_msgid))
|
||||
pass
|
||||
else:
|
||||
# line number should start at 1 like editor
|
||||
print(
|
||||
"E: line={} **UNMATCHED XML TAG: fuzzy={} tags_msgid={} tags_msgstr={}".format(
|
||||
msgstr_lnum + 1, fuzzy_in, tags_msgid, tags_msgstr
|
||||
)
|
||||
)
|
||||
if args.msg:
|
||||
print(" msgid = '{}'".format(msgid_str))
|
||||
print(" msgstr = '{}'".format(msgstr_str))
|
||||
error_count += 1
|
||||
else:
|
||||
# line number should start at 1 like editor
|
||||
print(
|
||||
"E: line={} **INVALID** XML: fuzzy={} error at msgid='{}' msgstr='{}' (truncated)".format(
|
||||
msgstr_lnum + 1, fuzzy_in, err0_str, err1_str
|
||||
)
|
||||
)
|
||||
if args.msg:
|
||||
print(" msgid = '{}'".format(msgid_str))
|
||||
print(" msgstr = '{}'".format(msgstr_str))
|
||||
error_count += 1
|
||||
state_last = state
|
||||
print("ERROR COUNT = {}".format(error_count))
|
||||
sys.exit(error_count)
|
||||
warn_count = 0
|
||||
for entry in po:
|
||||
if entry.fuzzy and not args.include_fuzzy:
|
||||
continue
|
||||
if entry.obsolete and not args.include_obsolete:
|
||||
continue
|
||||
xmlentry = XMLPOEntry(
|
||||
linenum=entry.linenum,
|
||||
tcomment=entry.tcomment,
|
||||
fuzzy=entry.fuzzy,
|
||||
obsolete=entry.obsolete,
|
||||
msgid=entry.msgid,
|
||||
msgstr=entry.msgstr,
|
||||
mask_entity=not args.expose_entity,
|
||||
mask_namespace=not args.expose_namespace,
|
||||
check_skip_tag_match=not args.ignore_skip_tag_match,
|
||||
)
|
||||
xmlentry.print_error()
|
||||
xmlentry.print_unmatched_tags()
|
||||
if (
|
||||
xmlentry.xml_msgid_err is not None
|
||||
or xmlentry.xml_msgstr_err is not None
|
||||
):
|
||||
error_count += 1
|
||||
if xmlentry.is_unmatched_xml() is True:
|
||||
warn_count += 1
|
||||
print("E: XML markup error counts = {}".format(error_count))
|
||||
print("W: XML unmatched tag counts = {}".format(warn_count))
|
||||
sys.exit(error_count + warn_count)
|
||||
|
||||
|
||||
#######################################################################
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue