-
Notifications
You must be signed in to change notification settings - Fork 0
/
BasicFunctions.py
163 lines (134 loc) · 5.75 KB
/
BasicFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# Description: This file contains the basic functions used for the SOGM model
import torch
import numpy as np
import json
# Function check if a string is a float
def isfloat(value):
try:
float(value)
return True
except ValueError:
return False
# Isolate single parenthesis for input list of words
def isoparenthesis(wordlist):
for i in range(len(wordlist)):
# if both left and right parentheses are in the same word, split them
if '(' in wordlist[i] and ')' in wordlist[i]:
continue
elif '(' in wordlist[i]:
wordlist[i] = wordlist[i].replace('(','')
wordlist.append('(')
elif ')' in wordlist[i]:
wordlist[i] = wordlist[i].replace(')','')
wordlist.append(')')
else:
continue
return wordlist
# Convert 3D tensor index to 2D tensor index
def getreshapeindices(original_indices, shape_of_original):
index_Ar = [original_indices[0] * shape_of_original[1] + original_indices[1], original_indices[2]]
return index_Ar
# Convert text to indices
def words2indices(propdata, maxsentences=42,maxwords=16,pad_index = 509):
with open('lib/word2idx_brand.json', 'r') as file:
word2idx = json.load(file)
# convert the words into indices with float type
propdata_idx = torch.ones(len(propdata), maxsentences, maxwords) * pad_index
for ipropind in range(0, len(propdata)):
iprop = propdata[ipropind] # get the sentence list for each sample
for isentenceind in range(len(iprop)):
sentence_words = isoparenthesis(iprop[isentenceind].split())
for iwordind in range(len(sentence_words)):
word = sentence_words[iwordind]
if isfloat(word):
if float(word) >= 0:
propdata_idx[ipropind, isentenceind, iwordind] = -float(word)
else:
print("warning: negative number: ", float(word))
else:
propdata_idx[ipropind, isentenceind, iwordind] = float(word2idx[word.lower()])
return propdata_idx
def rmse(y_true, y_pred):
# get mask that y_true between 0 and 1
mask = (y_true > 0) & (y_true < 1)
# Calculate the square of the differences
squared_errors = pow(y_true[mask] - y_pred[mask], 2)
# Calculate the mean of the squared errors
mean_squared_error = torch.mean(squared_errors)
# Take the square root of the mean squared error to get the RMSE
rmse_value = torch.sqrt(mean_squared_error)
return rmse_value
def getRMSE(tensor1, tensor2):
if tensor1.shape != tensor2.shape:
raise ValueError("Tensors must be of the same shape.")
num_signals = tensor1.shape[0]
rmse_values = torch.zeros(num_signals)
for i in range(num_signals):
rmse_values[i] = rmse(tensor1[i], tensor2[i])
return rmse_values
def getR(tensor1, tensor2):
if tensor1.shape != tensor2.shape:
raise ValueError("Tensors must be of the same shape.")
num_signals = tensor1.shape[0]
correlation_coefficients = np.zeros(num_signals)
for i in range(num_signals):
mask = (tensor1[i] > 0) & (tensor1[i] < 1)
correlation_coefficients[i] = np.corrcoef(tensor1[i][mask], tensor2[i][mask])[0, 1]
correlation_coefficients = torch.tensor(correlation_coefficients)
return correlation_coefficients
def setProp(specbrand, clay, silt, SOM, nitrogen, CEC, OC, tP, pH_w, Fe, dens, ec=-1, caco3=-1, tc=-1):
prop = ['Samples : Soil']
if not specbrand == None:
prop = prop + ['Spectrometer : ' + specbrand]
if clay >= 0:
prop = prop + ['Clay content : ' + str(clay) + ' %']
if silt >= 0:
prop = prop + ['Silt content : ' + str(silt) + ' %']
if silt >= 0 and clay >= 0:
prop = prop + ['Sand content : ' + str(100 - clay - silt) + ' %']
if SOM >= 0:
prop = prop + ['Soil organic matter : ' + str(SOM) + ' g/kg']
if nitrogen >= 0:
prop = prop + ['Total nitrogen content : ' + str(nitrogen) + ' g/kg']
if CEC >= 0:
prop = prop + ['Cation exchange capacity : ' + str(CEC) + ' cmol(+)/kg']
if OC >= 0:
prop = prop + ['Organic carbon content : ' + str(OC) + ' g/kg']
if tP >= 0:
prop = prop + ['Total phosphorus content : ' + str(tP) + ' mg/kg']
if pH_w >= 0:
prop = prop + ['pH measured from water solution : ' + str(pH_w)]
if Fe >= 0:
prop = prop + ['Iron content : ' + str(Fe) + ' mg/kg']
if dens >= 0:
prop = prop + ['Soil bulk density : ' + str(dens) + ' g/cm3']
if ec >= 0:
prop = prop + ['Electrical conductivity : ' + str(ec) + ' mS/m']
if caco3 >= 0:
prop = prop + ['CaCO3 content : ' + str(caco3) + ' g/kg']
if tc >= 0:
prop = prop + ['Total carbon content : ' + str(tc) + ' %']
return prop
def saveSpectra2heliosxml(spectra_data, filename):
"""
Save multiple spectra to an XML file.
:param spectra_data: list of tuples, where each tuple contains:
- wavelengths: 1D array-like of wavelengths
- spectrum: 1D array-like of spectrum data
- label: str, the label to use in the XML file
:param filename: str, the name of the file to save
"""
# Begin constructing XML-like string
xml_content = '<helios>\n\n\t<!-- -->\n'
# Iterate over all spectra data
for wavelengths, spectrum, label in spectra_data:
xml_content += f'\t<globaldata_vec2 label="{label}">\n'
# Adding wavelength and spectrum data
for w, s in zip(wavelengths, spectrum):
xml_content += f"\t\t{w} {s:.6f}\n"
xml_content += "\t</globaldata_vec2>\n\n"
# Closing tag
xml_content += '</helios>'
# Save string to file
with open(filename, "w") as file:
file.write(xml_content)