Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically detect character encoding of YAML files and ignore files #630

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
decoder: Autodetect encoding for ignore-from-file
Before this change, yamllint would decode files on the ignore-from-file
list using open()’s default encoding [1][2]. This can cause decoding to
fail in some situations (see the previous commit message for details).

This change makes yamllint automatically detect the encoding for files
on the ignore-from-file list. It uses the same algorithm that it uses
for detecting the encoding of YAML files, so the same limitations apply:
files must use UTF-8, UTF-16 or UTF-32 and they must begin with either a
byte order mark or an ASCII character.

[1]: <https://docs.python.org/3.12/library/fileinput.html#fileinput.input>
[2]: <https://docs.python.org/3.12/library/fileinput.html#fileinput.FileInput>
  • Loading branch information
Jayman2000 committed Nov 29, 2024
commit 4f97d1f5447038bb5c08fcf9d8f3ea5d851f8c98
4 changes: 4 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,10 @@ or:

.. note:: However, this is mutually exclusive with the ``ignore`` key.

.. note:: Files on the ``ignore-from-file`` list must use either UTF-8, UTF-16
or UTF-32. Additionally, they must start with either an ASCII character or a
byte order mark.

If you need to know the exact list of files that yamllint would process,
without really linting them, you can use ``--list-files``:

Expand Down
49 changes: 48 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import itertools
import os
import shutil
import sys
import tempfile
import unittest
from io import StringIO

from tests.common import build_temp_workspace, RunContext
from tests.common import (
build_temp_workspace,
register_test_codecs,
RunContext,
unregister_test_codecs,
)

from yamllint import cli, config
from yamllint.config import YamlLintConfigError
Expand Down Expand Up @@ -820,3 +826,44 @@ def test_run_with_ignore_on_ignored_file(self):
sys.stdout.getvalue().strip(),
'file-at-root.yaml:4:17: [error] trailing spaces (trailing-spaces)'
)

def create_ignore_file(self, text, codec):
path = os.path.join(self.wd, f'{codec}.ignore')
with open(path, 'wb') as f:
f.write(text.encode(codec))
self.addCleanup(lambda: os.remove(path))
return path

def test_ignored_from_file_with_multiple_encodings(self):
register_test_codecs()
self.addCleanup(unregister_test_codecs)

ignore_files = itertools.starmap(
self.create_ignore_file, (
('bin/file.lint-me-anyway.yaml\n', 'utf_32_be'),
('bin/file.yaml\n', 'utf_32_be_sig'),
('file-at-root.yaml\n', 'utf_32_le'),
('file.dont-lint-me.yaml\n', 'utf_32_le_sig'),

('ign-dup/file.yaml\n', 'utf_16_be'),
('ign-dup/sub/dir/file.yaml\n', 'utf_16_be_sig'),
('ign-trail/file.yaml\n', 'utf_16_le'),
('include/ign-dup/sub/dir/file.yaml\n', 'utf_16_le_sig'),

('s/s/ign-trail/file.yaml\n', 'utf_8'),
(
's/s/ign-trail/s/s/file.yaml\n'
's/s/ign-trail/s/s/file2.lint-me-anyway.yaml\n'
'.yamllint\n',

'utf_8_sig'
),
)
)
conf = ('---\n'
'extends: default\n'
f'ignore-from-file: [{", ".join(ignore_files)}]\n')

with self.assertRaises(SystemExit) as cm:
cli.run(('-d', conf, '.'))
self.assertEqual(cm.exception.code, 0)
30 changes: 30 additions & 0 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import codecs
import itertools
import unittest

from tests.common import (
UTF_CODECS,
encoding_detectable,
is_test_codec,
register_test_codecs,
temp_workspace,
temp_workspace_with_files_in_many_codecs,
test_codec_built_in_equivalent,
unregister_test_codecs,
uses_bom,
Expand Down Expand Up @@ -450,3 +453,30 @@ def test_auto_decode_with_strings_encoded_at_runtime(self):
+ "decoding error."
)
)

def perform_lines_in_file_test(self, strings):
workspace = temp_workspace_with_files_in_many_codecs(
'{}',
'\n'.join(strings)
)
with temp_workspace(workspace):
iterable = zip(
itertools.cycle(strings),
decoder.lines_in_files(workspace.keys())
)
for item in iterable:
self.assertEqual(item[0], item[1])

def test_lines_in_file(self):
self.perform_lines_in_file_test((
"YAML",
"ⓎⒶⓂⓁ",
"🅨🅐🅜🅛",
"YAML"
))
self.perform_lines_in_file_test((
"𝐘𝐀𝐌𝐋",
"𝖄𝕬𝕸𝕷",
"𝒀𝑨𝑴𝑳",
"𝓨𝓐𝓜𝓛"
))
14 changes: 8 additions & 6 deletions yamllint/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import fileinput
import os.path

import pathspec
Expand Down Expand Up @@ -110,8 +109,10 @@ def parse(self, raw_content):
raise YamlLintConfigError(
'invalid config: ignore-from-file should contain '
'filename(s), either as a list or string')
with fileinput.input(conf['ignore-from-file']) as f:
self.ignore = pathspec.PathSpec.from_lines('gitwildmatch', f)
self.ignore = pathspec.PathSpec.from_lines(
'gitwildmatch',
decoder.lines_in_files(conf['ignore-from-file'])
)
elif 'ignore' in conf:
if isinstance(conf['ignore'], str):
self.ignore = pathspec.PathSpec.from_lines(
Expand Down Expand Up @@ -164,9 +165,10 @@ def validate_rule_conf(rule, conf):
raise YamlLintConfigError(
'invalid config: ignore-from-file should contain '
'valid filename(s), either as a list or string')
with fileinput.input(conf['ignore-from-file']) as f:
conf['ignore'] = pathspec.PathSpec.from_lines(
'gitwildmatch', f)
conf['ignore'] = pathspec.PathSpec.from_lines(
'gitwildmatch',
decoder.lines_in_files(conf['ignore-from-file'])
)
elif ('ignore' in conf and not isinstance(
conf['ignore'], pathspec.pathspec.PathSpec)):
if isinstance(conf['ignore'], str):
Expand Down