日常笔记-2021-11-12

Git

Git pull 强制拉取并覆盖本地代码

1
2
3
git fetch --all
git reset --hard origin/master
git pull

python下载文件

1
python -m SimpleHTTPServer 8111

指定用户

1
export HADOOP_USER_NAME=*xxxx*

火焰图阅读

https://www.cnblogs.com/tcicy/p/8491899.html

gc日志查看工具

https://gceasy.io/gc-index.jsp

远程debug

1
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=8001

case when两种用法

https://blog.csdn.net/qq_36501591/article/details/104005083

scala编译

1
mvn clean scala:compile compile package assembly:assembly

spark远程提交和调试

https://blog.csdn.net/yiluohan0307/article/details/80048765

https://mp.weixin.qq.com/s/Rwz5uAI-TfnTBpppsMTfBg

quota的设置

1
2
3
hdfs dfs -put hdfs://cluster08/tmp/lx/
hdfs dfsadmin -setSpaceQuota 1KB
hdfs dfsadmin –clrSpaceQuota hdfs://cluster08/tmp/lx/

NoSuchMethodError: org.apache.hadoop.io.retry.RetryUtils.getDefaultRetryPolicy

1
2
3
4
5
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.8.1</version>
</dependency>

hive建表常用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// 创建一个分区表
create table dp.dept_part(
deptno int,
dname string,
loc string
)
partitioned by (dt string)
row format delimited fields terminated by '\t';

create table dp.name_list(name string) location 'hdfs://normal//hive_warehouse/_dp.db/name_list';
insert into table dp.name_list values('金融');

create table default.dept_part(deptno int,dname string,loc string) partitioned by (dt string) row format delimited fields terminated by '\t';
insert into table dp.dept_part partition(dt='2021-08-01') values(1,'金融','1F');
insert overwrite table dp.dept_part partition(dt='2021-08-01') values(1,'金融','1F');

alter table dp.dept_part add partition(dt='2021-08-01')

申请创建hdfs目录

1
2
hadoop fs -mkdir -p hdfs://cluster07//external_data/frontend_ai
hadoop fs -chown -R _frontend_ai:hdfs hdfs://cluster07//external_data/frontend_ai

删除block pool

1
nohup rm -rf /home/test/hard_disk/0/dfs/dn/current/BP-1203969992-10.208.50.21-1450855658517 > 1450855658517_0.out &

迁移ssd相关

1
2
hdfs storagepolicies -setStoragePolicy -path ${partition} -policy ${policy}
hdfs storagepolicies -getStoragePolicy -path hdfs://hotdata//hive_warehouse/cube.db/dm_sup_thematic_tool_goods_code

linux时间函数

1
date -d "1000 +30 minute" +"%H%M"

hive表权限相关

1
2
3
4
5
6
7
8
9
GRANT ALL on table dmt.all_hotdata_rw_count to user
SHOW GRANT USER u_dmt ON table dmt.all_hotdata_rw_count
SHOW GRANT user mllib ON DATABASE default;

GRANT SELECT on table dmt.all_hotdata_rw_count to role public;
GRANT SELECT on table dmt.remove_tempdb_noprt_tbl_list_new to role public;
GRANT SELECT on table dmt.remove_tempdb_noprt_tbl_list_all to role public;

revoke all on table dmt.all_hotdata_rw_count from user public;

spark submit相关

1
2
3
4
5
6
7
8
9
spark-submit  --class org.apache.spark.sql.TestSubmitApp \
--conf spark.eventLog.dir=hdfs://normal/tmp/spark3 \
--master yarn \
/home/hdfs/xiang66.li/ec-1.0-SNAPSHOT-jar-with-dependencies.jar

spark-submit --class org.apache.spark.sql.EcAndFileCombine \
--conf spark.eventLog.dir=hdfs://normal/tmp/spark3 \
--master yarn \
/home/hdfs/xiang66.li/ec-1.0-SNAPSHOT-jar-with-dependencies.jar 123456 654321 03_colddata_ec_or_combine_list root.basic_platform.online 0 8 4 true desc true normal '' false true 20000 false true false false false

excel技巧

1
2
=INDEX($A$1:$A$30,MATCH(L1,$B$1:$B$30,0),1)
在b1-b30(key),a1-a30(value)中,找到与L1(key)对应的value

hadoop编译

存储
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
export JAVA_HOME=/usr/share/java
tar xvf build-tools/cmake-3.8.2-Linux-x86_64.tar.gz
export PATH=$PWD/cmake-3.8.2-Linux-x86_64/bin:$PATH
cmake -version


# build
mvn package -Pdist,src,native -Drequire.snappy -Dbundle.snappy -Dsnappy.lib=/usr/lib64 -DskipTests -Dtar -Dcontainer-executor.conf.dir=/etc/hadoop

ts=$(date +%Y%m%d)
commit_id=${sourcerevision:0:7}
cd hadoop-dist/target/

# get spark yarn shuffle jar
wget -O hadoop-3.1.3/share/hadoop/yarn/spark-2.1.0-1.0.0-yarn-shuffle.jar \
http://tools.test.com/api/release/download/spark-yarn-shuffle/2.1.0-1.0.0/spark-2.1.0-1.0.0-yarn-shuffle.jar

sudo chown -R hdfs:hdfs hadoop-3.1.3

# cgroup
sudo chown root:yarn hadoop-3.1.3/bin/container-executor
sudo chmod 6050 hadoop-3.1.3/bin/container-executor

# rename
sudo mv hadoop-3.1.3 hadoop-3.1.3-${buildbranch}-${ts}-${commit_id}-os7
sudo tar -zcf hadoop-3.1.3-${buildbranch}-${ts}-${commit_id}-os7.tar.gz hadoop-3.1.3-${buildbranch}-${ts}-${commit_id}-os7
yarn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# check
ls /usr/share/

# ui
sudo mkdir -p ~/.m2/repository/com/github/eirslett/yarn/0.21.3
sudo cp -f /usr/share/yarn-0.21.3.tar.gz ~/.m2/repository/com/github/eirslett/yarn/0.21.3/

sudo mkdir -p ~/.m2/repository/com/github/eirslett/node/5.12.0/
sudo cp -f /usr/share/node-v5.12.0-linux-x64.tar.gz ~/.m2/repository/com/github/eirslett/node/5.12.0/node-5.12.0-linux-x64.tar.gz

# speed up git
git ls-remote --tags --heads https://github.com/DataTables/DataTables.git
git config --global url."https://".insteadOf git://

# build
mvn package -Pdist,src,native -Drequire.snappy -Dbundle.snappy -Dsnappy.lib=/usr/lib64 -Dzstd.lib=/usr/local/lib -Dbundle.zstd=true -DskipTests -Dtar -Dcontainer-executor.conf.dir=/etc/hadoop


HADOOP_HOME=$PWD
echo $HADOOP_HOME
find . -name *.so

ls -lt $HADOOP_HOME/hadoop-hdfs-project/hadoop-hdfs-native-client/target/target/usr/local/lib/
cp /home/jenkins/workspace/yarn-centos7/hadoop-hdfs-project/hadoop-hdfs-native-client/target/target/usr/local/lib/libhdfs* /home/jenkins/workspace/yarn-centos7/hadoop-dist/target/hadoop-3.2.0/lib/native/

ts=$(date +%Y%m%d)
commit_id=${sourcerevision:0:7}
cd hadoop-dist/target/

# get spark yarn shuffle jar
wget -O hadoop-3.2.0/share/hadoop/yarn/spark-3.0.1-SNAPSHOT-yarn-shuffle.jar \
http://tools.com/nexus/content/repositories/snapshots/com/test/spark-shuffle/3.0.1-SNAPSHOT/spark-shuffle-3.0.1-20210106.025509-1.jar

sudo chown -R hdfs:hdfs hadoop-3.2.0

# cgroup
sudo chown root:yarn hadoop-3.2.0/bin/container-executor
sudo chmod 6050 hadoop-3.2.0/bin/container-executor

# rename
sudo mv hadoop-3.2.0 hadoop-3.2.0-${buildbranch}-${ts}-${commit_id}-os7
sudo tar -zcf hadoop-3.2.0-${buildbranch}-${ts}-${commit_id}-os7.tar.gz hadoop-3.2.0-${buildbranch}-${ts}-${commit_id}-os7

spark编译

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
mypwd=`echo $PWD`
ls /usr/share/

wget https://www.python.org/ftp/python/3.6.1/Python-3.6.1.tgz
#cd build
tar -zxvf Python-3.6.1.tgz
cd Python-3.6.1
sudo mkdir -p /usr/local/python3
sudo ./configure --prefix=/usr/local/python3
sudo make
sudo make install
sudo ln -s /usr/local/python3/bin/python3 /usr/bin/python3
cd ../..

# ui
sudo mkdir -p ~/.m2/repository/com/github/eirslett/yarn/0.21.3
sudo cp -f /usr/share/yarn-0.21.3.tar.gz ~/.m2/repository/com/github/eirslett/yarn/0.21.3/

sudo mkdir -p ~/.m2/repository/com/github/eirslett/node/5.12.0/
sudo cp -f /usr/share/node-v5.12.0-linux-x64.tar.gz ~/.m2/repository/com/github/eirslett/node/5.12.0/node-5.12.0-linux-x64.tar.gz

#git ls-remote --tags --heads https://github.com/DataTables/DataTables.git
#git config --global url."https://".insteadOf git://

# build
#./build/mvn -Pyarn -Phive -Phive-thriftserver -Psparkr -DskipTests=true -Dmaven.compiler.source=1.8 -Dmaven.compiler.target=1.8
#./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests=true -Dmaven.compiler.source=1.8 -Dmaven.compiler.target=1.8
ts=`date "+%Y-%m-%d-%H-%M-%S"`
cd `echo $mypwd`
pwd
./dev/make-distribution.sh --pip --tgz --name ${ts}-${buildbranch} --mvn /usr/share/maven/bin/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests=true -Dmaven.compiler.source=1.8 -Dmaven.compiler.target=1.8

hadoop namenode切换

1
2
hdfs haadmin -transitionToActive --forcemanual nn1 将nn1强制转换为Active
hdfs haadmin -transitionToStandby --forcemanual nn2 将nn2强制转换为standby

查看文件副本数字

1
bin/hadoop fs -stat "%o %r" /liangly/teradata/part-00099

ec命令

1
hdfs ec -setPolicy -path ${ec_dt_location} -policy 'RS-6-3-1024k'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
<!-- add by lx-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.0</version>

<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- add by lx-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-yarn_2.12</artifactId>
<version>3.0.1</version>
</dependency>

启动网卡报错(Failed to start LSB: Bring up/down networking )汇总

链接:https://blog.51cto.com/u_11863547/1905929

proxifier下载和使用

链接:https://www.macdo.cn/17452.html

一道算法题

1
{{a,b},{c},{d,e}} => acd ace bcd bce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package com.lx.code;
import java.util.Stack;

/**
* @author lx
* @date 2021/10/17 4:55 下午
*/
public class JIETI {
public static int[][] a = new int[10][10];
public static Stack<Integer> stack = new Stack<>();
public static void dfs(int i, int len) {
if (i == len) {
System.out.println(1);
} else {
for (int j = 0; a[i][j] != 0; j++) {
stack.push(a[i][j]);
dfs(i + 1, len);
stack.pop();
}
}
}
public static void main(String[] args) {
a[0][0] = 1;
a[0][1] = 2;
a[1][0] = 3;
a[2][0] = 4;
a[2][1] = 5;
dfs(0, 3);
}
}