Skip to content

Commit

Permalink
database: Add support for Kconfig et Devicetree files
Browse files Browse the repository at this point in the history
This brings the idea of file families, each ident is identified by his
family and can be referenced in compatible families.

For exemple :
- A Kconfig ident can be referenced in a C file but not in a Devicetree file.
- A Devicetree ident is only referenced in Devicetree files.
- A C ident is only referenced in C files.

Kconfig idents are defined without the CONFIG_ at the beginning.
We add it while indexing for an easier processing of other files.

Signed-off-by: Maxime Chretien <maxime.chretien@bootlin.com>
  • Loading branch information
MaximeChretien committed May 22, 2020
1 parent bdcb386 commit 72571fb
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 31 deletions.
47 changes: 31 additions & 16 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
##################################################################################

defTypeR = {
'c': 'config',
'd': 'define',
'e': 'enum',
'E': 'enumerator',
Expand All @@ -50,31 +51,43 @@

class DefList:
'''Stores associations between a blob ID, a type (e.g., "function"),
and a line number.'''
def __init__(self, data=b''):
self.data = data
a line number and a file family.
Also stores in which families the ident exists for faster tests.'''
def __init__(self, data=b'#'):
self.data, self.families = data.split(b'#')

def iter(self, dummy=False):
for p in self.data.split(b','):
p = re.search(b'(\d*)(\w)(\d*)', p)
id, type, line = p.groups()
p = re.search(b'(\d*)(\w)(\d*)(\w)', p)
id, type, line, family = p.groups()
id = int(id)
type = defTypeR [type.decode()]
line = int(line)
yield(id, type, line)
family = family.decode()
yield(id, type, line, family)
if dummy:
yield(maxId, None, None)
yield(maxId, None, None, None)

def append(self, id, type, line):
def append(self, id, type, line, family):
if type not in defTypeD:
return
p = str(id) + defTypeD[type] + str(line)
p = str(id) + defTypeD[type] + str(line) + family
if self.data != b'':
p = ',' + p
self.data += p.encode()

def pack(self):
return self.data
return self.data + b'#' + self.families

def add_family(self, family):
family = family.encode()
if not family in self.families.split(b','):
if self.families != b'':
family = b',' + family
self.families += family

def get_families(self):
return self.families.decode().split(',')

class PathList:
'''Stores associations between a blob ID and a file path.
Expand All @@ -100,7 +113,8 @@ def pack(self):
return self.data

class RefList:
'''Stores a mapping from blob ID to list of lines.'''
'''Stores a mapping from blob ID to list of lines
and the corresponding family.'''
def __init__(self, data=b''):
self.data = data

Expand All @@ -110,16 +124,17 @@ def iter(self, dummy=False):
while s.tell() < size:
line = s.readline()
line = line [:-1]
b,c = line.split(b':')
b,c,d = line.split(b':')
b = int(b.decode())
c = c.decode()
yield(b, c)
d = d.decode()
yield(b, c, d)
s.close()
if dummy:
yield(maxId, None)
yield(maxId, None, None)

def append(self, id, lines):
p = str(id) + ':' + lines + '\n'
def append(self, id, lines, family):
p = str(id) + ':' + lines + ':' + family + '\n'
self.data += p.encode()

def pack(self):
Expand Down
28 changes: 25 additions & 3 deletions lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,28 @@ def getDataDir():
def currentProject():
return os.path.basename(os.path.dirname(getDataDir()))

def hasSupportedExt(filename):
ext = os.path.splitext(filename)[1]
return ext.lower() in ['.c', '.cc', '.cpp', '.c++', '.cxx', '.h', '.s']
def getFileFamily(filename):
name, ext = os.path.splitext(filename)

if ext.lower() in ['.c', '.cc', '.cpp', '.c++', '.cxx', '.h', '.s'] :
return 'C' # C file family and ASM
elif ext.lower() in ['.dts', '.dtsi'] :
return 'D' # Devicetree files
elif name.lower()[:7] in ['kconfig'] and not ext.lower() in ['.rst']:
# Some files are named like Kconfig-nommu so we only check the first 7 letters
# We also exclude documentation files that can be named kconfig
return 'K' # Kconfig files
else :
return None

compatibility_list = {
'C' : ['C', 'K'],
'K' : ['K'],
'D' : ['D']
}

# Check if families are compatible
# First argument can be a list of different families
# Second argument is the key for chossing the right array in the compatibility list
def compatibleFamily(file_family, requested_family):
return any(item in file_family for item in compatibility_list[requested_family])
50 changes: 47 additions & 3 deletions script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,15 @@ tokenize_file()
ref="$v:`denormalize $opt2`"
fi

if [ $opt3 = "D" ]; then #Don't cut around '-' in devicetrees
regex='s%((/\*.*?\*/|//.*?\001|[^'"'"']"(\\.|.)*?"|# *include *<.*?>|[^\w-])+)([\w-]+)?%\1\n\4\n%g'
else
regex='s%((/\*.*?\*/|//.*?\001|[^'"'"']"(\\.|.)*?"|# *include *<.*?>|\W)+)(\w+)?%\1\n\4\n%g'
fi

git cat-file blob $ref 2>/dev/null |
tr '\n' '\1' |
perl -pe 's%((/\*.*?\*/|//.*?\001|[^'"'"']"(\\.|.)*?"|# *include *<.*?>|\W)+)(\w+)?%\1\n\4\n%g' |
perl -pe "$regex" |
head -n -1
}

Expand Down Expand Up @@ -136,12 +142,49 @@ untokenize()
}

parse_defs()
{
case $opt3 in
"C")
parse_defs_C
;;
"K")
parse_defs_K
;;
"D")
parse_defs_D
;;
esac
}

parse_defs_C()
{
tmp=`mktemp -d`
full_path=$tmp/$opt2
git cat-file blob "$opt1" > "$full_path"
ctags -x --kinds-c=+p-m "$full_path" |
grep -avE "^operator |CONFIG_" |
awk '{print $1" "$2" "$3}'
rm "$full_path"
rmdir $tmp
}

parse_defs_K()
{
tmp=`mktemp -d`
full_path=$tmp/$opt2
git cat-file blob "$opt1" > "$full_path"
ctags -x --language-force=kconfig "$full_path" |
awk '{print "CONFIG_"$1" "$2" "$3}'
rm "$full_path"
rmdir $tmp
}

parse_defs_D()
{
tmp=`mktemp -d`
full_path=$tmp/$opt2
git cat-file blob "$opt1" > "$full_path"
ctags -x --c-kinds=+p-m "$full_path" |
grep -av "^operator " |
ctags -x --language-force=dts "$full_path" |
awk '{print $1" "$2" "$3}'
rm "$full_path"
rmdir $tmp
Expand Down Expand Up @@ -171,6 +214,7 @@ test $# -gt 0 || set help
cmd=$1
opt1=$2
opt2=$3
opt3=$4
shift

denormalize()
Expand Down
28 changes: 19 additions & 9 deletions update.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,10 @@ def update_definitions(self, idxes):
hash = db.hash.get(idx)
filename = db.file.get(idx)

if not lib.hasSupportedExt(filename): continue
family = lib.getFileFamily(filename);
if family == None: continue

lines = scriptLines('parse-defs', hash, filename)
lines = scriptLines('parse-defs', hash, filename, family)
for l in lines:
ident, type, line = l.split(b' ')
type = type.decode()
Expand All @@ -170,7 +171,8 @@ def update_definitions(self, idxes):
else:
obj = data.DefList()

obj.append(idx, type, line)
obj.add_family(family)
obj.append(idx, type, line, family)
if verbose:
print(f"def {type} {ident} in #{idx} @ {line}")
with defs_lock:
Expand Down Expand Up @@ -210,16 +212,23 @@ def update_references(self, idxes):
hash = db.hash.get(idx)
filename = db.file.get(idx)

if not lib.hasSupportedExt(filename): continue
family = lib.getFileFamily(filename)
if family == None: continue

tokens = scriptLines('tokenize-file', '-b', hash)
prefix = b''
# Kconfig values are saved as CONFIG_<value>
if family == 'K':
prefix = b'CONFIG_'

tokens = scriptLines('tokenize-file', '-b', hash, family)
even = True
line_num = 1
idents = {}
for tok in tokens:
even = not even
if even:

tok = prefix + tok

with defs_lock:
if db.defs.exists(tok) and lib.isIdent(tok):
if tok in idents:
Expand All @@ -236,7 +245,7 @@ def update_references(self, idxes):
else:
obj = data.RefList()

obj.append(idx, lines)
obj.append(idx, lines, family)
if verbose:
print(f"ref: {ident} in #{idx} @ {lines}")
db.refs.put(ident, obj)
Expand Down Expand Up @@ -274,7 +283,8 @@ def update_doc_comments(self, idxes):
hash = db.hash.get(idx)
filename = db.file.get(idx)

if not lib.hasSupportedExt(filename): continue
family = lib.getFileFamily(filename)
if family == None: continue

lines = scriptLines('parse-docs', hash, filename)
for l in lines:
Expand All @@ -286,7 +296,7 @@ def update_doc_comments(self, idxes):
else:
obj = data.RefList()

obj.append(idx, str(line))
obj.append(idx, str(line), family)
if verbose:
print(f"doc: {ident} in #{idx} @ {line}")
db.docs.put(ident, obj)
Expand Down

0 comments on commit 72571fb

Please sign in to comment.