-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
20220620-blog
- Loading branch information
Showing
4 changed files
with
286 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
--- | ||
title: hadoop作业查询与关闭 | ||
date: 2022-06-20 23:02:46 | ||
tags: | ||
- hadoop | ||
- yarn | ||
--- | ||
|
||
# hadoop作业的查询和关闭 | ||
|
||
|
||
|
||
- ### hadoop version < 2.3.0 | ||
|
||
查看正在运行的 Hadoop 任务: | ||
|
||
> hadoop job -list | ||
关闭Hadoop 任务进程: | ||
|
||
> hadoop job -kill $jobId | ||
组合以上两条命令就可以实现 kill 掉指定用户的 job | ||
|
||
```shell | ||
for i in `hadoop job -list | grep -w username| awk '{print $1}' | grep job_`; | ||
do | ||
hadoop job -kill $i; | ||
done | ||
``` | ||
|
||
username 就是你希望关闭 Hadoop 任务的用户 | ||
|
||
|
||
|
||
- ### hadoop version >= 2.3.0 | ||
|
||
查看正在运行的 Hadoop 任务: | ||
|
||
> yarn application -list | ||
关闭 Hadoop 任务进程: | ||
|
||
> yarn application -kill $ApplicationId |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
--- | ||
title: hadoop磁盘空间清理 | ||
date: 2022-06-20 23:02:59 | ||
tags: | ||
- hadoop | ||
- linux | ||
--- | ||
|
||
|
||
|
||
查看linux磁盘空间大文件 | ||
|
||
> du -h / --max-depth=5 | sort -hr | head -n 10 | ||
查看hadoop大文件 | ||
|
||
> hdfs dfs -du -h / | ||
分析其中占用空间过多的文件是否可以删除 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
--- | ||
title: hive-ip地理信息查询 | ||
date: 2022-06-20 23:04:06 | ||
tags: | ||
- hive | ||
--- | ||
|
||
# HIVE UDF IP查询 | ||
|
||
|
||
|
||
利用 hive-udf 自定义 IP 查询函数 | ||
|
||
借助 ipip 提供的 ipdb | ||
|
||
https://www.ipip.net/product/ip.html#ipv4city | ||
将 .ipdb 文件放在 resources 目录下 | ||
|
||
代码如下 | ||
|
||
```java | ||
import net.ipip.ipdb.City; | ||
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | ||
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; | ||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* @description: ip查询城市 | ||
* @author: xxzuo | ||
* @email: 1293378490@qq.com | ||
**/ | ||
public class IpLocationCity extends GenericUDF { | ||
private static City IPDB; | ||
private transient StringObjectInspector allCgi; | ||
|
||
/** | ||
* Initialize this GenericUDF. This will be called once and only once per | ||
* GenericUDF instance. | ||
* | ||
* @param arguments The ObjectInspector for the arguments | ||
* @return The ObjectInspector for the return value | ||
* @throws UDFArgumentException Thrown when arguments have wrong types, wrong length, etc. | ||
*/ | ||
@Override | ||
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { | ||
ObjectInspector arg = arguments[0]; | ||
if (arguments.length != 1) { | ||
throw new UDFArgumentLengthException( | ||
"The operator 'SubstrCgi' accepts one arguments."); | ||
} | ||
try { | ||
IPDB = new City(this.getClass().getResourceAsStream("/ipipfree.ipdb")); | ||
} catch (IOException e) { | ||
} | ||
this.allCgi = (StringObjectInspector) arg; | ||
return PrimitiveObjectInspectorFactory.javaStringObjectInspector; | ||
} | ||
|
||
/** | ||
* Evaluate the GenericUDF with the arguments. | ||
* | ||
* @param arguments The arguments as DeferedObject, use DeferedObject.get() to get the | ||
* actual argument Object. The Objects can be inspected by the | ||
* ObjectInspectors passed in the initialize call. | ||
* @return The | ||
*/ | ||
@Override | ||
public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException { | ||
String cgi = allCgi.getPrimitiveJavaObject(arguments[0].get()); | ||
if(null == cgi) { | ||
return null; | ||
} | ||
String ipInfo = ""; | ||
try { | ||
ipInfo = IPDB.find(cgi.toString(), "CN")[2]; | ||
} | ||
catch (Exception e) { | ||
} | ||
return ipInfo; | ||
} | ||
|
||
/** | ||
* Get the String to be displayed in explain. | ||
* | ||
* @param children | ||
*/ | ||
@Override | ||
public String getDisplayString(String[] children) { | ||
return "Usage: SubstrCgi(String cgi)"; | ||
} | ||
} | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
--- | ||
title: python按行切割文件 | ||
date: 2022-06-20 23:03:47 | ||
tags: | ||
- python | ||
--- | ||
|
||
# 利用python 按行分割文件 | ||
|
||
代码如下 | ||
|
||
```python | ||
import os | ||
import time | ||
import tkinter as tk | ||
from tkinter import filedialog | ||
from tkinter.messagebox import showinfo, showwarning, showerror | ||
|
||
|
||
def mkSubFile(lines, head, srcName, sub): | ||
[des_filename, extname] = os.path.splitext(srcName) | ||
filename = des_filename + '_' + str(sub) + extname | ||
print('make file: %s' % filename) | ||
fout = open(filename, 'w') | ||
|
||
try: | ||
fout.writelines([head]) | ||
fout.writelines(lines) | ||
return sub + 1 | ||
finally: | ||
fout.close() | ||
|
||
|
||
def splitByLineCount(filename, count): | ||
fin = open(filename, 'r') | ||
try: | ||
head = fin.readline() | ||
buf = [] | ||
sub = 1 | ||
for line in fin: | ||
buf.append(line) | ||
if len(buf) == count: | ||
sub = mkSubFile(buf, head, filename, sub) | ||
buf = [] | ||
if len(buf) != 0: | ||
sub = mkSubFile(buf, head, filename, sub) | ||
finally: | ||
fin.close() | ||
|
||
|
||
def init(): | ||
entryNum['state'] = "disable" | ||
btnConfirm['state'] = "normal" | ||
|
||
|
||
def confirm(): | ||
f_path = filedialog.askopenfilename() | ||
inputFilePath.set(f_path) | ||
|
||
|
||
def clear(): | ||
inputFilePath.set("") | ||
row.set("") | ||
|
||
|
||
def startSplitFile(): | ||
if len(inputFilePath.get()) == 0: | ||
showwarning(title="警告", | ||
message="未选择文件路径!") | ||
return | ||
try: | ||
if int(row.get()) <= 0: | ||
showwarning(title="警告", | ||
message="输入的不是正整数!") | ||
return | ||
except: | ||
showwarning(title="警告", | ||
message="输入的不是整数!") | ||
return | ||
count = int(row.get()) | ||
begin = time.time() | ||
splitByLineCount(inputFilePath.get(), count) | ||
end = time.time() | ||
print('time is %d seconds ' % (end - begin)) | ||
|
||
|
||
def closeWindow(): | ||
root.destroy() | ||
|
||
|
||
if __name__ == '__main__': | ||
root = tk.Tk() | ||
root.title("File Split") | ||
root.resizable(False, False) | ||
root.geometry("600x100+480+320") | ||
|
||
mess = tk.Label(root, text="请选择要切分的文件:") | ||
mess.place(x=20, y=10, width=200, height=20) | ||
|
||
inputFilePath = tk.StringVar(root, value='') | ||
entryNum = tk.Entry(root, width=80, textvariable=inputFilePath) | ||
entryNum.place(x=220, y=10, width=260, height=20) | ||
|
||
btnConfirm = tk.Button(root, text='选择文件', command=confirm) | ||
btnConfirm.place(x=500, y=10, width=70, height=20) | ||
|
||
mess1 = tk.Label(root, text="请输入切分的文件行数:") | ||
mess1.place(x=20, y=40, width=200, height=20) | ||
|
||
row = tk.StringVar(root, value='') | ||
entryNum1 = tk.Entry(root, width=80, textvariable=row) | ||
entryNum1.place(x=220, y=40, width=200, height=20) | ||
|
||
btnStart = tk.Button(root, text='清空', command=clear) | ||
btnStart.place(x=260, y=70, width=70, height=20) | ||
|
||
btnSet = tk.Button(root, text='开始切分', command=startSplitFile) | ||
btnSet.place(x=125, y=70, width=70, height=20) | ||
|
||
init() | ||
root.protocol("WM_DELETE_WINDOW", closeWindow) | ||
root.mainloop() | ||
``` | ||
|