admin管理员组

文章数量:1591210

作者:张华 发表于:2023-03-10
版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明

问题

有一台NUC(minipc), 配置是:

  • CPU: i7-13700H, 16核20线程
  • 内存:16G+32G=48G
  • 硬盘:512G NVME

想用这台单机创建一个local test bed, 用来方便做各种云实验,最好是openstack over openstack:

  • underlying openstack由devstack或microstack创建来提供虚机
  • upper openstack由juju调用underlying openstack提供的虚机创建

或者underlying openstack采用maas提供虚机, 然后upper openstack由juju调用underlying openstack提供的虚机创建

Devstack创建underlying openstack

采用之前博客的local.conf ( https://blog.csdn/quqi99/article/details/97622336 )创建devstack环境。接着还要做几件事:
1, disable quota

# disable quotas for project, neutron and nova
PROJECT_ID=$(openstack project show --domain default admin -f value -c id)
openstack quota set --instances -1 ${PROJECT_ID}
openstack quota set --floating-ips -1 ${PROJECT_ID}
openstack quota set --cores -1 ${PROJECT_ID}
openstack quota set --ram -1 ${PROJECT_ID}
openstack quota set --gigabytes -1 ${PROJECT_ID}
openstack quota set --volumes -1 ${PROJECT_ID}
openstack quota set --secgroups -1 ${PROJECT_ID}
openstack quota set --secgroup-rules -1 ${PROJECT_ID}
neutron quota-update --network -1
neutron quota-update --floatingip -1
neutron quota-update --port -1
neutron quota-update --router -1
neutron quota-update --security-group -1
neutron quota-update --security-group-rule -1
neutron quota-update --subnet -1
openstack quota show

2, 修改/etc/nova/nova-cpu.conf增大allocation_ratio避免创建不了几个虚机就失败了

sudo vim /etc/nova/nova-cpu.conf
[DEFAULT]
cpu_allocation_ratio = 32.0
ram_allocation_ratio = 3.0
disk_allocation_ratio = 20.0
#disable the check for 'While synchronizing instance power states, found 9 instances in the database and 10 instances on the hypervisor'
instance_sync_interval = -1
sudo systemctl restart devstack@n-cpu.service

一些解决此问题中用过的调试命令:

openstack hypervisor list
openstack hypervisor show --fit-width minipc
openstack allocation candidate list --fit-width --resource VCPU=2
openstack allocation candidate list --fit-width --resource DISK_GB=5
openstack allocation candidate list --fit-width --resource MEMORY_MB=200000
openstack resource provider inventory list $(openstack resource provider list |grep minipc |awk '{print $2}')
openstack resource provider usage show $(openstack resource provider list |grep minipc |awk '{print $2}')
sudo nova-manage cell_v2 list_hosts
openstack hypervisor show 1 -f shell | grep -E 'memory_mb|vcpus|free'
openstack hypervisor stats show
du -sh /opt/stack/data/nova/instances
sudo journalctl -f --unit devstack@n-cpu.service

3, 要禁用SG

openstack network set --disable-port-security private
openstack network set --disable-port-security public
#PROJECT_ID=$(openstack project show --domain default admin -f value -c id)
#SECGRP_ID=$(openstack security group list --project ${PROJECT_ID} | awk '/default/ {print $2}')
#openstack security group rule list $SECGRP_ID
#openstack security group rule create $SECGRP_ID --protocol icmp --remote-ip 0.0.0.0/0
#openstack security group rule create $SECGRP_ID --protocol tcp --dst-port 22
#openstack security group rule create $SECGRP_ID --protocol tcp --dst-port 5000

创建镜像与设置key的命令如下:

sudo systemctl enable $(find /etc/systemd/system -name 'devstack@*.service' -print)
source accrc/admin/admin
wget https://cloud-images.ubuntu/jammy/current/jammy-server-cloudimg-amd64-disk-kvm.img
openstack image create jammy --disk-format qcow2 --file /bak/images/iso/jammy-server-cloudimg-amd64-disk-kvm.img --public
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa_devstack
openstack keypair create --public-key ~/.ssh/id_rsa_devstack.pub mykey

VM_NAME=i1
openstack server create --wait --image jammy --flavor m1.small --key-name mykey --network=$(openstack network show private -c id -f value) ${VM_NAME}
fixed_ip=$(openstack server show "${VM_NAME}" -caddresses -fjson |jq -r '.addresses.private[0]')
public_network=$(openstack network show public -f value -c id)
fip=$(openstack floating ip create $public_network -f value -c floating_ip_address)
openstack floating ip set $fip --fixed-ip-address $fixed_ip --port $(openstack port list --fixed-ip ip-address="${fixed_ip}" -c id -f value)
sleep 2 && ssh -i ~/.ssh/id_rsa_devstack ubuntu@$fip -v

microstack提供underlying openstack

microstack ussury版本有metadata issue, 导致创建的虚机无法登录,后来创建修改libguestfs-tools可以进虚机了,但由于metadata issue的存在虚机的名字全部是ubuntu, 导致无法用它做 underlying openstack. 所以最后我们使用devstack来提供underlying openstack.

snap install microstack --classic --channel=rocky/edge
sudo snap refresh --devmode microstack
sudo snap set system snapshots.automatic.retention=no
#sudo microstack init --auto --control
sudo microstack.init --auto  #for rocky
wget https://cloud-images.ubuntu/jammy/current/jammy-server-cloudimg-amd64-disk-kvm.img
#we must copy image file to /var/snap/microstack/common/images to avoid 'No such file or directory'
sudo cp ./jammy-server-cloudimg-amd64.img /var/snap/microstack/common/images/
sudo cp /bak/images/iso/focal-server-cloudimg-amd64-disk-kvm.img /var/snap/microstack/common/images/
microstack.openstack image create jammy --disk-format qcow2 --file /var/snap/microstack/common/images/jammy-server-cloudimg-amd64.img --public
#IMAGE_ID=$(curl http://cloud-images.ubuntu/focal/current/focal-server-cloudimg-amd64.img | microstack.openstack image create --public --container-format=bare --disk-format=qcow2 -f value -c id focal) 
microstack.openstack image list
rm -rf /home/hua/snap/microstack/common/.ssh/id_*
cp ~/.ssh/id_rsa /home/hua/snap/microstack/common/.ssh/id_microstack
cp ~/.ssh/id_rsa.pub /home/hua/snap/microstack/common/.ssh/id_microstack.pub
microstack.openstack keypair create --public-key /home/hua/snap/microstack/common/.ssh/id_microstack.pub microstack
microstack launch jammy -n i1 -f m1.small

#it always has 'Permission denied' when using 'microstack launch' to boot vm, so use this
cat << EOF | sudo tee /var/snap/microstack/common/user-data
#cloud-config
user: ubuntu
password: password
chpasswd: { expire: False }
EOF
microstack.openstack server create --wait --image jammy --flavor m1.small --key-name microstack \
  --network=$(microstack.openstack network show test -c id -f value) \
  --user-data /var/snap/microstack/common/user-data --config-drive true i2
public_network=$(microstack.openstack network show external -f value -c id)
fip=$(microstack.openstack floating ip create $public_network -f value -c floating_ip_address)
microstack.openstack floating ip set $fip --fixed-ip-address 192.168.222.11 --port $(microstack.openstack port list --fixed-ip ip-address=192.168.222.11 -c id -f value)

修改虚机镜像允许密码登录。但是由于microstack创建的虚机存在metadata问题,即使通过修改镜像允许了密码登录,但hostname这些还是无法通过metadata设置。

#modify image to include default password and dns=192.168.99.1
sudo apt-get install libguestfs-tools -y
#sudo guestmount --rw -a /bak/images/iso/jammy-server-cloudimg-amd64-disk-kvm.img -i /mnt
#sudo chroot /mnt ls /
#sudo guestunmount /mnt
$ sudo guestfish --rw -a /bak/images/iso/jammy-server-cloudimg-amd64-disk-kvm.img
><fs> run
 100%
><fs> list-filesystems
/dev/sda1: ext4
/dev/sda14: unknown
/dev/sda15: vfat
><fs> mount /dev/sda1 /
><fs> vi /etc/cloud/cloud.cfg
disable_root: false
ssh_pwauth: true
system_info:
   ...
   default_user:
     name: ubuntu
     lock_passwd: false
     plain_text_passwd: "password"
><fs> vi /etc/ssh/sshd_config
PasswordAuthentication yes
><fs> quit

继续配置juju使用microstack openstack:

microstack.openstack server list
chmod 400 /home/hua/snap/microstack/common/.ssh/id_rsa
#ssh-add /home/hua/snap/microstack/common/.ssh/id_rsa
#but it doesn't work
ssh -o "PubkeyAcceptedKeyTypes +ssh-rsa" -i /home/hua/snap/microstack/common/.ssh/id_rsa ubuntu@10.20.20.162 -v
#access https://10.20.20.1:443
#ssh -N -L 8001:10.20.20.1:443 hua@minipc.lan  #then on t440p access https://127.0.0.1:8001
#sudo snap get microstack config.credentials.keystone-password
sudo snap disable microstack && sudo snap enable microstack
sudo ln -s /var/snap/microstack/common/etc/microstack.rc ~/novarc
source ~/novarc

#https://microstack.run/docs/using-juju
mkdir ~/simplestreams
OS_SERIES=jammy
OS_REGION=microstack
IMAGE_ID=$(openstack image show jammy -fvalue -cid)
juju metadata generate-image -d ~/simplestreams -i $IMAGE_ID -s $OS_SERIES -r $OS_REGION -u $OS_AUTH_URL
#add another image in the future
#juju metadata generate-image -d ~/simplestreams -i $(openstack image show focal -fvalue -cid) -s focal -r RegionOne -u $OS_AUTH_URL
#juju metadata add-image --series focal $(openstack image show focal -fvalue -cid)
#juju metadata list-images
#除了设置image-metadata-url,还可以设置tools-metadata-url  - https://github/cloudbase/juju-core/blob/master/doc/simplestreams-metadata.txt
#但是lxd是不支持metadata的
#$ juju metadata validate-images
#ERROR lxd provider does not support image metadata validation

cat /var/snap/microstack/common/etc/ssl/certs/cacert.pem
vim microstack.yaml
clouds:
  microstack:
    type: openstack
    auth-types: [access-key,userpass]
    regions:
      microstack:
        endpoint: https://192.168.99.179:5000/v3
    ca-certificates:
      - |
        -----BEGIN CERTIFICATE-----
        xxx
        -----END CERTIFICATE-----
#juju add-cloud --client microstack -f ./microstack.yaml
juju update-cloud --client microstack -f ./microstack.yaml
juju autoload-credentials
juju credentials --show-secrets --format yaml
juju bootstrap --bootstrap-series=$OS_SERIES --metadata-source=~/simplestreams \
   --model-default network=test --model-default external-network=external \
   --model-defaults use-default-secgroup=false \
   --bootstrap-constraints allocate-public-ip=true microstack microstack
openstack server list

juju deploy kubernetes-core
juju run-action --wait kubernetes-worker/0 microbot replicas=2
mkdir ~/.kube
juju scp kubernetes-master/0:config ~/.kube/config
sudo snap install kubectl --classic

juju over devstack openstack

配置juju与openstack集成:

sudo ln -s /bak/openstack/devstack/accrc/admin/admin  ~/novarc
sudo sed -i 's/localhost/minipc.lan/g' ~/novarc
source ~/novarc
cat << EOF |tee devstack.yaml
clouds:
  devstack:
    type: openstack
    auth-types: [userpass]
    endpoint: http://minipc.lan:5000/v3/
    regions:
      RegionOne:
        endpoint: http://minipc.lan:5000/v3/
credentials:
  devstack:
    admin:
      auth-type: userpass
      tenant-name: admin
      user-domain-name: default
      project-domain-name: default
      username: admin
      password: password
EOF
juju update-cloud --local devstack -f ./devstack.yaml
juju update-credential --local devstack -f ./devstack.yaml
#juju add-cloud --local devstack -f ./devstack.yaml
#juju add-credential --local devstack -f ./devstack.yaml
cat ~/.local/share/juju/clouds.yaml
rm -rf ~/simplestreams && mkdir -p ~/simplestreams
OS_SERIES=jammy
OS_REGION=RegionOne
IMAGE_ID=$(openstack image show jammy -fvalue -cid)
juju metadata generate-image -d ~/simplestreams -i $IMAGE_ID -s $OS_SERIES -r $OS_REGION -u $OS_AUTH_URL
openstack network set --disable-port-security private
openstack network set --disable-port-security public
juju bootstrap --bootstrap-series=jammy --metadata-source=~/simplestreams \
  --model-default network=private --model-default external-network=public \
  --config use-default-secgroup=false --bootstrap-constraints allocate-public-ip=true \
  --debug devstack devstack
declare -a model_defaults=( 'use-default-secgroup=false'
                           )
juju model-defaults ${model_defaults[@]}
#--config logging-config="<root>=TRACE;unit=TRACE"
sudo virsh console <xx>

使用时最好也配置squid缓存, squid的配置见:https://blog.csdn/quqi99/article/details/104278572

juju model-defaults apt-http-proxy=http://minipc.lan:3128 apt-https-proxy=http://minipc.lan:3128
juju model-defaults juju-http-proxy=http://minipc.lan:3128 juju-https-proxy=http://minipc.lan:3128
juju model-defaults juju-no-proxy=127.0.0.1,localhost,::1,192.168.0.0/16,10.0.0.0/8,172.16.0.0/16
juju model-defaults no-proxy=127.0.0.1,localhost,::1,192.168.0.0/16,10.0.0.0/8,172.16.0.0/16
juju model-defaults snap-http-proxy=http://minipc.lan:3128 snap-https-proxy=http://minipc.lan:3128
#cannot specify both legacy proxy values and juju proxy values
juju model-defaults --reset http-proxy
juju model-defaults --reset https-proxy
juju model-defaults |grep proxy

使用它创建k8s:

juju deploy kubernetes-core
juju run-action --wait kubernetes-worker/0 microbot replicas=2
mkdir ~/.kube
juju scp kubernetes-master/0:config ~/.kube/config
sudo snap install kubectl --classic

或创建o7k:

#juju destroy-model -y --destroy-storage --force --no-wait ovn
./generate-bundle.sh --name ovn -s jammy --ovn --use-stable-charms --run
./generate-bundle.sh --name ovn -s jammy --ovn --use-stable-charms

最后,因为我们的虚机网络10.0.0.0/26是geneve tunnel网络,也就意味着minipc物理机上没有这个网段的IP,会造成minipc无法访问upper openstack虚机,可以借助下列路由经juju controller FIP中转

sudo ip route add 10.0.0.0/26 via <juju-controller-FIP>
sudo iptables -t nat -A FORWARD -s 192.168.101.0/24 -d 10.0.0.0/26 -j ACCEPT  #on juju-controller

注:这里遇到一个奇怪的问题,添加了这条路由之后,可以ping, 但无法ssh (kex_exchange_identification: read: Connection reset by peer), 再创建一个同子网的bastion虚机之内是能ssh的。一个可能的原因是juju-controller-FIP并不是在juju-controller上,它只是juju-controller的一个FIP(它应该是在GW上), 这难道有什么说法吗?不管它了,最后用了下面的另外一个workaround解决了此问题:

sshuttle -D -r ubuntu@192.168.101.79 10.0.0.0/16

另外,剩下一个最重要的问题,发现openstack虚机有时候会自动shutdown,尝试将某一个虚机pin在pCPU 18与19上。

sudo virsh emulatorpin instance-0000000
sudo virsh vcpuinfo instance-00000002
sudo virsh vcpupin instance-00000002
sudo virsh vcpupin instance-00000002 0 18 --config --live
sudo virsh vcpupin instance-00000002 1 19 --config --live

也尝试在/etc/nova/nova-cpu.conf中设置下列参数,都不work.

instance_sync_interval = -1
sync_power_state_interval = -1

可能还是这个NUC没有足够cpu资源来跑多达 17个虚机. 也设置了grub和cpu governor 来提供该NUC的性能:

GRUB_CMDLINE_LINUX_DEFAULT="mitigations=off nohpet nokaslr"

sudo apt install cpufrequtils sysfsutils linux-tools-generic linux-tools-$(uname -r) linux-cloud-tools-$(uname -r) -y
sudo cpupower -c all frequency-set -g performance
cat /proc/cpuinfo | grep "MHz"

最后,也做了cpu pin来减小自动shutoff问题:

#!/bin/bash
NUM_CPUS=20
PINNED_CPUS=1
for VMID in $(virsh list --name --state-running); do
  NUM_VCPUS=$(virsh dumpxml ${VMID} | grep vcpu | wc -l)
  for VCPU in $(seq 0 $(($NUM_VCPUS - 1))); do
    if [ $PINNED_CPUS -eq $NUM_CPUS ]; then
      PINNED_CPUS=1
    fi
    echo $VMID has $NUM_VCPUS vCPUs
    echo virsh vcpupin $VMID $VCPU $PINNED_CPUS
    sudo virsh vcpupin $VMID $VCPU $PINNED_CPUS --config --live
    PINNED_CPUS=$((PINNED_CPUS + 1))
  done
done

但这些都只能缓存,不能彻底消除shutoff资源不足的问题,要解决shutoff只能修改bundle template (git diff configure novarc openstack.yaml.template overlays/neutron-ovn.yaml overlays/mysql-innodb-cluster.yaml)
在opensatck.yaml.template中定义一些机器:

machines:
  '0':
    constraints: mem=4G
  '1':
    constraints: mem=4G
  '2':
    constraints: mem=4G
  '3':
    constraints: mem=4G
  '4':
    constraints: mem=2G

然后将mysql与ovn-central用to: 参数分别部署到0, 1, 2, 将nova-compute部署到3, 将neutron-api部署到4 (将它单独部署是因为它容易出错), 将openstack.yaml.template中的其他服务分别部署到0, 1, 2 (这些不定义可能会默认都使用0), 其他未提到的服务不定义machine仍然创建各自虚机部署
charm容易出错的一些问题 :
1, neutron-api charm 出错“neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugins/ml2/ml2_conf.ini upgrade head”

pass=`juju run --unit mysql/leader 'leader-get mysql-password'`
juju ssh mysql/leader -- mysql -uroot -p<xxx>
drop database neutron;
create database neutron;
sudo neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugins/ml2/ml2_conf.ini upgrade head

20230712更新 - , 其实这个问题是最好别添加这个relation ([‘mysql:certificates’, ‘vault:certificates’] ), 否则vault依赖mysql, mysql没有ready, vault就不ready, vault不ready, 其他很多charm如这里的neutron-api

2, nova-compute/0启不来,检查日志发现nova-ovn-metadata报: 2023-04-07 18:25:09.688 4218 ERROR ovsdbapp.backend.ovs_idlmand ovsdbapp.backend.ovs_idl.idlutils.RowNotFound: Cannot find Chassis_Private with name=juju-bfeea0-ovn-0.openstackgate.local . 这个重新安装就好了
3, nova-compute/0启不来,日志发现: vaultlocker encrypt --uuid 3ec2d3c5-d5e5-4bef-8f46-7cb8a8476c5d /dev/vdb

juju config nova-compute encrypt=false

4, glance对上传images的总大小是有限制的,默认为1G,如果上传的所有images加起来已经超1G的话将不法再创建成功。

openstack registered limit list
openstack registered limit set --service glance --default-limit 5000 --region RegionOne 57539850f0a7425abe43f306eb80a369
You are not authorized to perform the requested action: identity:update_registered_limit.

vim /etc/keystone/keystone.conf
[oslo_policy]
/etc/keystone/policy.json
oslopolicy-policy-generator --namespace keystone
identity:update_registered_limit: rule:admin_required

5, 运行openstack命令时报: CERTIFICATE_VERIFY_FAILED, 可添加export OS_CACERT=/etc/ssl/certs/ 解决。 原因是运行devstack时导致安装了python3-pip且通过pip3安装了certifi这个包, 具体见:https://blog.csdn/quqi99/article/details/107182847
6, 若devstack没有添加’LIBVIRT_CPU_MODE=host-passthrough’, 可手动打开nested KVM, 或virsh edit添加’’

vim /etc/nova/nova-cpu.conf
[libvirt]
live_migration_uri = qemu+ssh://hua@%s/system
#cpu_model = Nehalem
#cpu_mode = custom
cpu_mode = host-passthrough
virt_type = kvm

7, 接着创建虚机失败是因为neutron-server.log太大了有3.4G多,将模板改为debug=False. 同时删除了它再新建(chown -R neutron:neutron /var/log/neutron/neutron-server.log), 最后重启neutron-server就好了
8, 不设置ml2-dns会导致启动后的虚机中的systemd-resovled启动失败

juju config neutron-api-plugin-ovn dns-servers=192.168.99.1

9, 添加devstack profile (git diff configure novarc openstack.yaml.template overlays/neutron-ovn.yaml overlays/mysql-innodb-cluster.yaml), 记得在novarc中加(export OS_CACERT=/etc/ssl/certs/),将模板中的debug=False避免neutron-api的日志达到3G多

$ cat profiles/devstack 
#!/bin/bash -ex
# devstack overrides
[[ -z "$NAMESERVER" ]] && export NAMESERVER="192.168.99.1"
[[ -z "$GATEWAY" ]] && export GATEWAY="10.0.0.1"
[[ -z "$CIDR_EXT" ]] && export CIDR_EXT="10.0.0.0/26"
[[ -z "$FIP_RANGE" ]] && export FIP_RANGE="10.0.0.55:10.0.0.62"
#[[ -z "$SWIFT_IP" ]] && export SWIFT_IP="10.230.19.58"
./profiles/default $@

统一网络设计

这个机器不可能只运行devstack + juju, 如果我们还想做lxd试验,也想做maas + openstack实验呢?
下面是openstack网络划分与minipc的物理网络实现:

  • maas网络使用10.230.16.0/21, 无dhcp,,做SNAT(iptables -t nat -A POSTROUTING -s 10.230.16.0/21 ! -d 10.230.16.0/21 -j MASQUERADE && netfilter-persistent save) 然后maas会将它配置dhcp来实现PXE所以它也有DHCP. 10.230.16.0/16网络作为各虚机的第一块网卡eth0. 这个网段再细分四块:给虚机用的(10.230.17.1 - 10.230.18.254 and 10.230.21.1 - 20.230.23.254); maas作enlisting/commissioning时用的(10.230.16.6 - 10.230.16.255); OpenStack VIPs (10.230.19.1 - 10.230.19.255); 保留给maas节点用的(10.230.16.1 - 10.230.16.5)
  • 虚机里的第二块网卡从br-ext(10.230.64.0/21), 无dhcp, 它用于br-data, br-data是一个ovs bridge, neutron/ovn会用到它实现provider network (non-tunnel network)的东西向流量以及南北向流量. 所以虚机里只用netplan设置第一块网络eth0的IP即可, 不用操作eth1的.
  • 特别要注意的是:因为br-maas最后是通过maas带了dhcp,那它和带dhcp的物理网段192.168.99.0/24一定要在不同的fabric里(要做bond的话两块网卡必须来自相同的fabric), 否则会造成虚机里的如eth0获取DHCP可能会随机从两个网段先获得从而造成有时无法访问10.230.16.1/21上的PXE image (同理,虚机里如果有多个磁盘的话,可能sda与sdb的顺序不一样也会造成PXE启动失败)
  • 可将物理网卡eth0做成br-eth0,给lxd容器使用。 实际上做openstack实验用不上这块网卡。并且虚机内的dns还是用 10.230.16.1来给maas machines提供dns, 虚机里不应该使用外部的如node1这样的dns, 所以maas api endpoint如果安装在node1上应该直接使用IP而不是域名,否则192.168.9.1无法解析node1将造成juju controller无法访问在node1上的maas影响juju machines的创建
##INFRA - br-maas
#Netmask:   255.255.248.0
#Network:   10.230.16.0/21
#HostMin:   10.230.16.1
#HostMax:   10.230.23.254
#Broadcast: 10.230.23.255
# (10.230.16.1 - 10.230.16.5)   - Range reserved for infra/maas
# (10.230.16.6 - 10.230.16.255) - Range for enlisting/commissioning nodes
# (10.230.19.1 - 10.230.19.255) - Range for Openstack VIPs
# (10.230.20.1 - 10.230.20.255) - Range for Infra/Reserved
# (10.230.17.1 - 10.230.18.254) and (10.230.21.1 - 10.230.23.254) - The rest is for infra dhcp
#Hosts/Net: 2046
##OPENSTACK - br-ext
#Netmask:   255.255.248.0
#Network:   10.230.64.0/21
#HostMin:   10.230.64.1
#HostMax:   10.230.71.254
#Broadcast: 10.230.71.255
# (10.230.64.1 - 10.230.64.4)   - Range for Infra/Reserved
# (10.230.65.0 - 10.230.71.254) - Range for Openstack FIPs and and ceph replication
#Hosts/Net: 2046
cat << EOF |sudo tee /etc/netplan/01-network-manager-all.yaml
network:
  version: 2
  #renderer: NetworkManager
  renderer: networkd
  ethernets:
    enp2s0:
      dhcp4: no
      dhcp6: no
      match:
        macaddress: a0:36:bc:58:2b:ff
      wakeonlan: true
  bridges:
    #one is 192.168.99.0/24 with dhcp, one is 10.230.16.0/21 without dhcp(maas-dns will set dhcp for it)
    br-eth0:
      dhcp4: yes
      dhcp6: no
      interfaces:
      - enp2s0
      #Use 'etherwake a0:36:bc:58:2b:ff' to wol in bridge
      macaddress: a0:36:bc:58:2b:ff
      routes:
        - to: default
          via: 192.168.99.1
      nameservers:
        addresses:
        - 192.168.99.1
    br-maas:
      #iptables -t nat -A POSTROUTING -s 10.230.21.0/21 ! -d 10.230.21.0/21 -j MASQUERADE
      #netfilter-persistent save
      dhcp4: false
      dhcp6: false
      #maas admin vlan update \$FABRIC_ID $VLAN_TAG dhcp_on=True primary_rack=\$PRIMARY_RACK
      addresses:
      - 10.230.16.1/21
      nameservers:
        #maas admin maas set-config name=upstream_dns value=192.168.99.1
        addresses:
        - 10.230.16.1
    br-ext:
      dhcp4: false
      dhcp6: false
      addresses:
      - 10.230.64.1/21
EOF
sudo systemctl enable systemd-networkd.service && sudo systemctl start systemd-networkd.service
sudo netplan try
sudo netplan apply
sudo apt install bridge-utils netfilter-persistent -y
sudo iptables -t nat -A POSTROUTING -s 10.230.21.0/16 ! -d 10.230.21.0/16 -j MASQUERADE
sudo netfilter-persistent save

如果今后还有第二台机器也想加入实验的话,可创建一个veth-maas(与br-eth0之间的peer)接入br-maas即可(或者两台机器作geneve tunnel也行):

sudo ip link add veth-maas type veth peer name veth-eth0
sudo brctl addif br-maas veth-maas
sudo brctl addif br-eth0 veth-eth0
sudo ip link set dev veth-maas up
sudo ip link set dev veth-eth0 up
#sudo ip l del veth-eth0 master br-eth0
sudo sysctl -w net.ipv4.ip_forward=1

题外话,使用netplan的配置是想运行一些post script hook时不方便, 未测试下面使用networkd-dispatcher hook的曲线救国方法.

ubuntu 22.04 netplan添加钩子 启动网卡时自动添加iptables预设规则 - https://blog.csdn/kq425/article/details/125631320
sudo apt install networkd-dispatcher
cat << EOF |sudo tee /etc/networkd-dispatcher/routable.d/50-ifup-hooks
#!/bin/sh
for d in up post-up; do
    hookdir=/etc/network/if-\${d}.d
    [ -e \$hookdir ] && /bin/run-parts \$hookdir
done
exit 0
EOF
cat << EOF |sudo tee /etc/networkd-dispatcher/off.d/50-ifdown-hooks
#!/bin/sh
for d in down post-down; do
    hookdir=/etc/network/if-\${d}.d
    [ -e \$hookdir ] && /bin/run-parts \$hookdir
done
exit 0
EOF
sudo chmod +x /etc/networkd-dispatcher/routable.d/50-ifup-hooks
sudo chmod +x /etc/networkd-dispatcher/off.d/50-ifdown-hooks
cat << EOF |sudo tee /etc/network/if-up.d/connect-br-maas-to-br-eth0
#!/bin/bash
ip l add name veth-eth0 type veth peer name veth-maas
ip l set dev veth-eth0 up
ip l set dev veth-maas up
ip l set veth-eth0 master br-eth0
ip l set veth-maas master br-maas
EOF
cat << EOF |sudo tee /etc/network/if-down.d/delete-br-maas-to-br-eth0
#!/bin/bash
ip l del veth-eth0 master br-eth0
EOF
sudo chmod +x /etc/network/if-up.d/connect-br-maas-to-br-eth0
sudo chmod +x /etc/network/if-down.d/delete-br-maas-to-br-eth0
/bin/run-parts /etc/network/if-up.d/ --test
/bin/run-parts /etc/network/if-down.d/ --test

直接创建ovs-bridge的方法如下,但我们的设计并没有使用ovs-bridge的需求:

auto br-eth0
allow-ovs br-eth0
iface br-eth0 inet static
pre-up /usr/bin/ovs-vsctl -- --may-exist add-br br-eth0
pre-up /usr/bin/ovs-vsctl -- --may-exist add-port br-eth0 eno1
  address 192.168.99.125
  gateway 192.168.99.1
  network 192.168.99.0
  netmask 255.255.255.0
  broadcast 192.168.99.255
ovs_type OVSBridge
ovs_ports eno1

#sudo ip -6 addr add 2001:2:3:4500:fa32:e4ff:febe:87cd/64 dev br-eth0
iface br-phy inet6 static
pre-up modprobe ipv6
address 2001:2:3:4500:fa32:e4ff:febe:87cd
netmask 64
gateway 2001:2:3:4500::1

auto eno1
allow-br-phy eno1
iface eno1 inet manual
ovs_bridge br-eth0
ovs_type OVSPort

使用Networkmanager来代替netplan的方法容易支持post script hook:

root@node1:~# cat /etc/network/interfaces
auto lo
iface lo inet loopback
auto eth0
iface eth0 inet manual
auto br-eth0
iface br-eth0 inet static
    address 192.168.99.124/24
    gateway 192.168.99.1
    bridge_ports eth0
    dns-nameservers 192.168.99.1
    bridge_stp on
    bridge_fd 0
    bridge_maxwait 0
    up echo -n 0 > /sys/devices/virtual/net/$IFACE/bridge/multicast_snooping
# for stateless it's 'inet6 auto', for stateful it's 'inet6 dhcp'
iface br-eth0 inet6 auto
    #iface eth0 inet6 static
    #address 2001:192:168:99::135                                                                                            
    #gateway 2001:192:168:99::1
    #netmask 64
    # use SLAAC to get global IPv6 address from the router
    # we may not enable ipv6 forwarding, otherwise SLAAC gets disabled
    # sleep 5 is due a bug and 'dhcp 1' indicates that info should be obtained from dhcpv6 server for stateless
    up echo 0 > /proc/sys/net/ipv6/conf/$IFACE/disable_ipv6
    up sleep 5
    autoconf 1
    accept_ra 2
    dhcp 1

上面创建的两个网桥br-eth0与br-maas要想给KVM用,还需要用virsh定义两个网桥:

cat << EOF | sudo tee /tmp/bridge.xml
<network>
    <name>br-maas</name>
    <forward mode="bridge" />
    <bridge name="br-maas" />
</network>
EOF
sudo virsh net-define /tmp/bridge.xml
sudo virsh net-start br-maas
sudo virsh net-autostart br-maas
sudo virsh net-list
cat << EOF | sudo tee /tmp/bridge.xml
<network>
    <name>br-eth0</name>
    <forward mode="bridge" />
    <bridge name="br-eth0" />
</network>
EOF
sudo virsh net-define /tmp/bridge.xml
sudo virsh net-start br-eth0
sudo virsh net-autostart br-eth0
sudo virsh net-list

另外,iptables的规则是在L3层, 当net.bridge.bridge-nf-call-iptables=1时,对于出虚机的流量是先经过L3再经过L2,就可能发生在流量到达L2流之前就经过L3层的iptables过滤过了(注:对于进虚机的流量是先经过L2再经过L3所以不影响),为了避免这种情况需要设置如下规则:

cat << EOF | sudo tee -a /etc/sysctl.conf
net.bridge.bridge-nf-call-ip6tables = 0
net.bridge.bridge-nf-call-iptables = 0
net.bridge.bridge-nf-call-arptables = 0
EOF
sudo sysctl -p /etc/sysctl.conf
cat << EOF | sudo tee -a /etc/modules-load.d/br_netfilter.conf
br_netfilter
EOF
sudo modprobe br_netfilter

apt cache

注意:maas并没法使用我们这个apt-cacher-ng定制的apt mirror. maas在国内网络下最好就使用它默认的build-in maas-proxy, maas-proxy会在8000端口运行squid,同时在maas machine器内通过curtin配置/etc/apt/apt.conf.d/90curtin-aptproxy来使用这个cache. 当有时候maas deploy machine总发生curtin错误最后查出来居然是没有使用build-in maas-proxy,而使用了国内的ubuntu mirror反而造成问题

ubuntu@n1:~$ cat /etc/apt/apt.conf.d/90curtin-aptproxy 
Acquire::http::Proxy "http://192.168.9.1:8000/";

使用apt-cacher-ng配置apt cache的方法如下:

sudo apt install apt-cacher-ng -y
echo 'PassThroughPattern: .*' |sudo tee -a /etc/apt-cacher-ng/acng.conf
sudo systemctl restart apt-cacher-ng.service && sudo systemctl enable apt-cacher-ng.service
du -sh /var/cache/apt-cacher-ng/
#vim /var/lib/dpkg/info/apt-cacher-ng.postinst
#dpkg --configure apt-cacher-ng

#change the dir from /var/cache/apt-cacher-ng/ to /mnt/udisk/apt-cacher-ng
cat << EOF |sudo tee -a /etc/fstab
#use blkid to see uuid
UUID="d63d7251-ec3d-4ef5-aa92-f3d4c480f20c" /mnt/udisk   ext4    defaults    0  2
EOF
mkfs.ext4 -F -L udisk /dev/sdb1
mkdir /mnt/udisk/apt-cacher-ng
chown -R apt-cacher-ng:apt-cacher-ng /mnt/udisk/apt-cacher-ng
sudo sed -i 's/CacheDir: \/var\/cache\/apt-cacher-ng/CacheDir: \/mnt\/udisk\/apt-cacher-ng/g' /etc/apt-cacher-ng/acng.conf
du -sh /mnt/udisk/apt-cacher-ng

#Use apt cache proxy
echo 'Acquire::http::Proxy "http://proxy:3142";' | sudo tee /etc/apt/apt.conf.d/01acng
#echo 'Acquire::HTTP::Proxy::ppa.launchpadcontent "DIRECT";' |sudo tee -a /etc/apt/apt.conf.d/01acng

pip mirror

运行devstack测试时才需要pip mirror.

#use pip mirror, or use this instead: PYPI_ALTERNATIVE_URL=http://mirrors.aliyun/pypi/simple
mkdir -p ~/.pip
cat << EOF |tee ~/.pip/pip.conf
[global]
trusted-host=mirrors.aliyun
index-url = http://mirrors.aliyun/pypi/simple
disable-pip-version-check = true
timeout = 120
EOF

[可选] image mirror

注意注意:我们完全不需要做image mirror:

  • 对于http://archive.ubuntu/ubuntu, 我们最好不要为maas配置国内镜像,最好使用build-in maas-proxy,它会在8000端口生成squid, 然后通过curtin在maas machine中配置/etc/apt/apt.conf.d/90curtin-aptproxy来使用apt cache, 用国内镜像反而由于特色网络会偶尔造成deploy machine时出现curtin错误导致failed, 切记切记。
  • 对于images.maas.io这些也不需要做image mirror, 因为gw处设置白名单不proxy它时是可以正常下载的。

注:使用sstream-mirror时始终进度是0%, 确保images.maas.io的IP位于whitelist ipset即可可解决, 所以看起来并不需要做image mirror。注意:只有在运行’ping images.maas.io’触发路由器解析该域名之后才能在whitelist里看到该IP(ipset test whiltelist 185.125.190.37)

dig images.maas.io
ipset list |grep Name
ipset test whiltelist 185.125.190.37
# iptables-save |grep -i set |grep whitelist
-A SS_SPEC_WAN_AC -m set --match-set whitelist dst -j RETURN
  • archive.ubuntu, 可用 http://mirrors.cloud.tencent/ubuntu 或 https://mirrors.tuna.tsinghua.edu/ubuntu/ 代替
  • ports.ubuntu, 可用http://ports.ubuntu/ubuntu-ports 代替

注:在maas中先用了上面的mirror, 但是在’juju bootstrap’时使用’virsh console’查看到了下列错误,所以最终又将这两个mirrors去掉了。

   77.289935] cloud-init[1278]:   Something wicked happened resolving 'mirrors.cloud.tencent:http' (-5 - No address associated with hostname)

maas.io与cloud-images.ubuntu自己做mirror, 方法如下:

sudo apt -y install simplestreams -y
#create images.maas.io mirror
KEYRING_FILE=/usr/share/keyrings/ubuntu-cloudimage-keyring.gpg
IMAGE_SRC=https://images.maas.io/ephemeral-v3/stable
IMAGE_DIR=/images/simplestreams/maas/images/ephemeral-v3/stable
sudo mkdir -p /images/simplestreams/maas/images/ephemeral-v3/stable
sudo sstream-mirror --keyring=$KEYRING_FILE $IMAGE_SRC $IMAGE_DIR 'arch=amd64' 'release~(focal|jammy)' --max=1 --progress
sudo sstream-mirror --keyring=$KEYRING_FILE $IMAGE_SRC $IMAGE_DIR 'os~(grub*|pxelinux)' --max=1 --progress

#create cloud-images.ubuntu mirror, for lxd and kvm
workdir=/images/simplestreams/lxdkvm
sudo sstream-mirror --keyring=/usr/share/keyrings/ubuntu-cloudimage-keyring.gpg --progress --max=1 --path=streams/v1/index.json https://cloud-images.ubuntu/releases/ $workdir 'arch=amd64' 'release~(focal|jammy)' 'ftype~(lxd.tar.xz|squashfs|root.tar.xz|root.tar.gz|disk1.img|.json|.sjson)'

#create streams.canonical mirror, for juju agent
workdir=/images/simplestreams/juju
sudo sstream-mirror --no-verify --progress --max=2 --path=streams/v1/index2.sjson https://streams.canonical/juju/tools/ $workdir 'arch=amd64' 'release~(jammy)'

#How to use images.maas.io mirror, remember to import the key to maas first
scp /home/hua/ca/ca.crt root@192.168.99.221:/usr/local/share/ca-certificates/ca.crt
lxc exec maas -- chmod 644 /usr/local/share/ca-certificates/ca.crt
lxc exec maas -- update-ca-certificates --fresh
lxc exec maas -- wget https://node1.lan/maas/images/ephemeral-v3/stable/streams/v1/index.sjson
#NOTE: we should use /snap/maas/current/usr/share/keyrings/ubuntu-cloudimage-keyring.gpg rather than /usr/share/keyrings/ubuntu-cloudimage-keyring.gpg, so don't need to change keyring_filename
apt install jq -y
BOOT_SOURCE_ID=$(maas admin boot-sources read | jq '.[] | select(.url | contains("images.maas.io/ephemeral-v3")) | .id')
maas admin boot-source update $BOOT_SOURCE_ID url=https://node1.lan:443/maas/images/ephemeral-v3/stable/
maas admin boot-resources import

#How to use cloud-images.ubuntu mirror, remember to import the key first as well  (NOT TEST yet)
#import the key HERE
juju model-config container-image-stream=released container-image-metadata-url=https://node1.lan/lxdkvm image-metadata-url=https://node1.lan/lxdkvm
#how to use streams.canonical mirror
juju model-config agent-metadata-url=https://node1.lan/juju agent-stream=released

然后解决密钥:

#https://goharbor.io/docs/2.6.0/install-config/configure-https/
openssl genrsa -out ca.key 4096
openssl req -x509 -new -nodes -sha512 -days 3650 -subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=node1.lan" -key ca.key -out ca.crt
openssl genrsa -out node1.lan.key 4096
openssl req -sha512 -new -subj "/C=CN/ST=Beijing/L=Beijing/O=example/OU=Personal/CN=node1.lan" -key node1.lan.key -out node1.lan.csr
#complies with the Subject Alternative Name (SAN) and x509 v3 extension requirements to avoid 'x509: certificate relies on legacy Common Name field, use SANs instead'
cat > v3.ext <<-EOF
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment
extendedKeyUsage = serverAuth
subjectAltName = @alt_names

[alt_names]
DNS.1=node1.lan
DNS.2=node1
DNS.3=hostname
EOF
openssl x509 -req -sha512 -days 3650 -extfile

本文标签: TestCloudbed