1#!/bin/bash 2 3# Find duplicate shared libraries by md5 checksum and possible duplicates by size. 4# Results will be available in the out directory of the build. 5# Usage: 6# ./dupcheck.sh <out_dir> <image> 7 8OUT_DIR="$1" 9IMG="$2" 10TMP_MD5="${OUT_DIR}/_dup_md5" 11TMP_SIZE="${OUT_DIR}/_dup_size" 12TMP_CHECK="${OUT_DIR}/_dup_tmp_check" 13TMP_SIZE_REAL="${OUT_DIR}/_dup_size_real" 14TMP_FILE1="${OUT_DIR}/_dup_f1" 15TMP_FILE2="${OUT_DIR}/_dup_f2" 16MD5_DUPLICATES="${OUT_DIR}/duplicate-libs-md5-${IMG}.txt" 17SIZE_DUPLICATES="${OUT_DIR}/duplicate-libs-size-${IMG}.txt" 18 19# Check arguments 20if [ "$#" -ne 2 ]; then 21 echo "Usage: ./dupcheck.sh <out_dir> <image>" 22 exit 1 23fi 24 25# Check host and toolchain version 26CHECK_HOST=$(uname) 27if [ "${CHECK_HOST}" == "Linux" ]; then 28 ARCH="linux-x86" 29else 30 ARCH="darwin-x86" 31fi 32BINUTILS_PATH="./prebuilts/clang/host/${ARCH}/llvm-binutils-stable" 33 34# Remove any old files if they exist. 35if [ -f "${MD5_DUPLICATES}" ]; then 36 rm "${MD5_DUPLICATES}" 37fi 38 39if [ -f "${SIZE_DUPLICATES}" ]; then 40 rm "${SIZE_DUPLICATES}" 41fi 42 43# Find all .so files and calculate their md5. 44find ./"${OUT_DIR}"/${IMG}/ -name "lib*.so" -type f -print0 | xargs -0 md5sum | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" | sed "s/^. *[0-9] //" > "${TMP_MD5}" 2>&1 45 46if [ -s "${TMP_MD5}" ]; then 47 while read -r list; do 48 checksum=$(echo "${list}" | cut -f1 -d ' ') 49 filename=$(echo "${list}" | cut -f2 -d ' ') 50 # For each md5, list the file paths that match. 51 { 52 echo "MD5: ${checksum}"; \ 53 find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 md5sum | grep "${checksum}" | sed 's/^.* //'; \ 54 echo ""; \ 55 } >> "${MD5_DUPLICATES}" 56 done <"${TMP_MD5}" 57else 58 echo "No duplicate files by md5 found." >> "${MD5_DUPLICATES}" 59fi 60 61# Cleanup 62rm "${TMP_MD5}" 63 64# Find possible duplicate .so files by size. 65find ./"${OUT_DIR}"/${IMG}/ -name "*.so" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" > "${TMP_SIZE}" 2>&1 66if [ -s "${TMP_SIZE}" ]; then 67 while read -r list; do 68 size=$(echo "${list}" | cut -f2 -d ' ') 69 filename=$(echo "${list}" | cut -f3 -d ' ') 70 # Check if the files are not in the md5sum list and do nothing if that is the case. 71 find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | grep "${size}" | sed "s/^.* //" | sort > "${TMP_CHECK}" 2>&1 72 while read -r filepath; do 73 found=$(grep -F "${filepath}" "${MD5_DUPLICATES}") 74 if [ -z "${found}" ]; then 75 echo "${filepath}" >> "${TMP_SIZE_REAL}" 76 fi 77 done<"${TMP_CHECK}" 78 # For every duplication found, diff the .note and .text sections. 79 if [ -s "${TMP_SIZE_REAL}" ]; then 80 { 81 echo "File: ${filename}, Size: ${size}"; \ 82 cat "${TMP_SIZE_REAL}"; \ 83 echo ""; \ 84 } >> "${SIZE_DUPLICATES}" 85 count=$(wc -l "${TMP_SIZE_REAL}" | cut -f1 -d ' ') 86 # Limitation: this only works for file pairs. If more than two possible duplications are found, the user need to check manually 87 # all the possible combinations using the llvm-readelf and llvm-objdump commands below. 88 if [ "${count}" = 2 ]; then 89 file1=$(head -n 1 "${TMP_SIZE_REAL}") 90 file2=$(tail -n 1 "${TMP_SIZE_REAL}") 91 # Check .note section 92 ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file1}" > "${TMP_FILE1}" 2>&1 93 ${BINUTILS_PATH}/llvm-readelf --wide --notes "${file2}" > "${TMP_FILE2}" 2>&1 94 { 95 diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \ 96 echo ""; 97 } >> "${SIZE_DUPLICATES}" 98 # Check .text section 99 ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file1}" | sed "1d;2d"> "${TMP_FILE1}" 2>&1 100 ${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file2}" | sed "1d;2d"> "${TMP_FILE2}" 2>&1 101 { 102 diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d"; \ 103 echo ""; 104 } >> "${SIZE_DUPLICATES}" 105 # Cleanup 106 rm "${TMP_FILE1}" "${TMP_FILE2}" 107 else 108 echo "*Note: more than one duplicate. Manually verify all possible combinations." >> "${SIZE_DUPLICATES}" 109 fi 110 rm "${TMP_SIZE_REAL}" 111 echo "" >> "${SIZE_DUPLICATES}" 112 fi 113 done <"${TMP_SIZE}" 114 # Cleanup 115 rm "${TMP_SIZE}" "${TMP_CHECK}" 116else 117 echo "No duplicate files by size found." >> "${SIZE_DUPLICATES}" 118fi 119