Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 31 additions & 27 deletions src/cluecode/copyrights.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,9 @@ def detect(self,
'IS', 'HELD',
])

# then walk the parse parse_tree, collecting copyrights, years and authors
# walk parse tree
for tree_node in parse_tree:

if not isinstance(tree_node, Tree):
if TRACE:
logger_debug(f'CopyrightDetector: parse_tree node: {tree_node}')
Expand All @@ -336,6 +337,7 @@ def detect(self,
tree_node_label = tree_node.label

if (include_copyrights or include_holders) and 'COPYRIGHT' in tree_node_label:

copyrght = build_detection_from_node(
node=tree_node,
cls=CopyrightDetection,
Expand All @@ -344,40 +346,41 @@ def detect(self,
refiner=refine_copyright,
)

if TRACE or TRACE_DEEP:
logger_debug(f'CopyrightDetector: final copyright: {copyrght}')
if not copyrght:
continue

if copyrght:
if include_copyrights:
yield copyrght
holder = None
if include_holders:
holder = build_detection_from_node(
node=tree_node,
cls=HolderDetection,
ignored_labels=non_holder_labels,
refiner=refine_holder,
)

if include_holders:
# By default we strip email and urls from holders ....
if not holder:
holder = build_detection_from_node(
node=tree_node,
cls=HolderDetection,
ignored_labels=non_holder_labels,
ignored_labels=non_holder_labels_mini,
refiner=refine_holder,
)

if not holder:
# ... but if we have no holder, we try again and
# this time we keep email and URLs for holders using
# "non_holder_labels_mini" as an "ignores" label set
holder = build_detection_from_node(
node=tree_node,
cls=HolderDetection,
ignored_labels=non_holder_labels_mini,
refiner=refine_holder,
)
if (
copyrght.copyright
and re.match(r'^Copyright\s+\d{4}$', copyrght.copyright.strip())
and not holder
):
continue

if holder:
if TRACE:
logger_debug(f'CopyrightDetector: holders: {holder}')
if include_copyrights:
yield copyrght

yield holder
if include_holders and holder:
yield holder

elif include_authors and tree_node_label == 'AUTHOR':

author = build_detection_from_node(
node=tree_node,
cls=AuthorDetection,
Expand All @@ -388,10 +391,9 @@ def detect(self,
if author:
if TRACE:
logger_debug(f'CopyrightDetector: detected authors: {author}')

yield author


def get_tokens(numbered_lines, splitter=re.compile(r'[\t =;]+').split):
"""
Return an iterable of pygmars.Token built from a ``numbered_lines`` iterable
Expand Down Expand Up @@ -3559,6 +3561,7 @@ def refine_copyright(c):
c = strip_suffixes(c, suffixes=COPYRIGHTS_SUFFIXES)
c = strip_trailing_period(c)
c = c.strip("'")
c = re.sub(r'\b(\d{4})-\$', r'\1', c)
return c.strip()


Expand Down Expand Up @@ -3620,8 +3623,7 @@ def refine_author(a):
"""
if not a:
return
# FIXME: we could consider to split comma separated lists such as
# gthomas, sorin@netappi.com, andrew.lunn@ascom.che.g.

a = remove_some_extra_words_and_punct(a)
a = refine_names(a, prefixes=AUTHORS_PREFIXES)
a = a.strip()
Expand All @@ -3633,10 +3635,12 @@ def refine_author(a):
a = refine_names(a, prefixes=AUTHORS_PREFIXES)
a = a.strip()
a = a.strip('+-')

if a and a.lower() not in AUTHORS_JUNK and not a.startswith(AUTHORS_JUNK_PREFIX):
return a



def refine_names(s, prefixes):
"""
Refine a detected ``s`` name string from a author or holder.
Expand Down