tech-pkg archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: checksum argmax fix



Ok, I think I have a general fix. I initially didn't think this was possible, but then found the obscure ::=str option in bmake which is the only way to make this work, as the result of mktemp needs to both be lazily evaluated, and be shared across shell sessions.

With the attached patch I'm able to both create and verify distinfo on NetBSD with an older make, as long as I set CHECKSUM_LIMIT to a value above ARG_MAX:

  $ cd wip/grafana

  $ make CHECKSUM_LIMIT=265000 distinfo
  => Bootstrap dependency digest>=20211023: found digest-20220214
  => distinfo: unchanged.

  $ make CHECKSUM_LIMIT=265000 checksum | tail
  => Checksum BLAKE2s OK for sourcegraph.com_sourcegraph_appdash_@v_v0.0.0-20190731080439-ebfcffb1b5c0.zip
  => Checksum SHA512 OK for sourcegraph.com_sourcegraph_appdash_@v_v0.0.0-20190731080439-ebfcffb1b5c0.zip
  [...]
  $ echo $?
  0

Will need a bunch more testing, but I'm hopeful that this approach is reasonable and minimises any impact. It's not ideal that we're duplicating 99% of the targets, but I'm not sure there's any good way around that.

--
Jonathan Perkin   -   mnx.io   -   pkgsrc.smartos.org
Open Source Complete Cloud   www.tritondatacenter.com
diff --git a/mk/checksum/checksum.awk b/mk/checksum/checksum.awk
index 7bdd141af47c..b1dfe1dcc8c5 100755
--- a/mk/checksum/checksum.awk
+++ b/mk/checksum/checksum.awk
@@ -47,9 +47,11 @@ BEGIN {
 	# Retain output compatible with previous "checksum" shell script
 	progname = "checksum"
 
+	arg_count = 0
 	only_alg = ""
 	distinfo = ""
 	exitcode = 0
+	input = ""
 	patch = 0
 	suffix = ""
 
@@ -57,6 +59,11 @@ BEGIN {
 		opt = ARGV[arg]
 		if (opt == "-a") {
 			only_alg = ARGV[++arg]
+		} else if (opt == "-I") {
+			infile = ARGV[++arg]
+			while (getline < infile) {
+				arg_list[arg_count++] = $0
+			}
 		} else if (opt == "-p") {
 			patch = 1
 		} else if (opt == "-s") {
@@ -93,7 +100,11 @@ BEGIN {
 	# in patch mode (-p).
 	#
 	while (arg < ARGC) {
-		distfile = ARGV[arg++]
+		arg_list[arg_count++] = ARGV[arg++]
+	}
+	i = 0
+	while (i < arg_count) {
+		distfile = arg_list[i++]
 		sfile = distfile
 		if (suffix) {
 			sfile = strip_suffix(sfile)
@@ -167,12 +178,30 @@ BEGIN {
 			continue
 		}
 
+		#
+		# Due to command line argument limits we sometimes need to
+		# split commands up.  Store a count for each algorithm, which
+		# also serves as a way to iterate over all of the algorithms
+		# we've encountered.
+		#
+		if (!batch[algorithm]) {
+			batch[algorithm] = 0
+		}
+
 		#
 		# If not a patch file, then we're handling a distfile, where we
 		# want to build a list of input files to digest(1) so they can
 		# all be calculated in one go.
 		#
-		distsums[algorithm] = sprintf("%s %s", distsums[algorithm],
+		# Increase the batch number if over 64K.  This is well below the
+		# limits seen in the wild (e.g. NetBSD at 256K), but there are
+		# very minimal improvements above this threshold in testing.
+		#
+		b = batch[algorithm]
+		if (length(distsums[algorithm,b]) > 65536) {
+			batch[algorithm] = ++b
+		}
+		distsums[algorithm,b] = sprintf("%s %s", distsums[algorithm,b],
 		    distfiles[distfile])
 	}
 	close(distinfo)
@@ -182,23 +211,26 @@ BEGIN {
 	# pass them all to a single digest(1) command and parse the checksums
 	# to be compared against distinfo.
 	#
-	for (algorithm in distsums) {
-		cmd = sprintf("%s %s %s", DIGEST, algorithm,
-		    distsums[algorithm])
-		while ((cmd | getline) > 0) {
-			# Should be unnecessary, but just in case.  If we want
-			# to be really paranoid then test that $1 == algorithm.
-			if (NF != 4) {
-				continue
-			}
-			# strip "(filename)" -> "filename"
-			distfile = substr($2, 2, length($2) - 2)
-			if (suffix) {
-				distfile = strip_suffix(distfile)
+	for (algorithm in batch) {
+		for (b = 0; b <= batch[algorithm]; b++) {
+			cmd = sprintf("%s %s %s", DIGEST, algorithm,
+			    distsums[algorithm,b])
+			while ((cmd | getline) > 0) {
+				# Should be unnecessary, but just in case.  If
+				# we want to be really paranoid then test that
+				# $1 == algorithm.
+				if (NF != 4) {
+					continue
+				}
+				# strip "(filename)" -> "filename"
+				distfile = substr($2, 2, length($2) - 2)
+				if (suffix) {
+					distfile = strip_suffix(distfile)
+				}
+				checksums[$1, distfile] = $4
 			}
-			checksums[$1, distfile] = $4
+			close(cmd)
 		}
-		close(cmd)
 	}
 
 	#
diff --git a/mk/checksum/checksum.mk b/mk/checksum/checksum.mk
index 7cafe84dac68..4a4f106a9366 100644
--- a/mk/checksum/checksum.mk
+++ b/mk/checksum/checksum.mk
@@ -25,22 +25,64 @@ _PATCH_DIGEST_ALGORITHMS?=	SHA1
 #
 _COOKIE.checksum=	${_COOKIE.extract}
 
+#
+# Some packages are now hitting ARG_MAX limits as they require thousands of
+# distfiles.  Older versions of bmake cannot handle this, and so the only
+# option is to use temporary files.  Packages can define CHECKSUM_LIMIT to
+# an integer containing a minimum ARG_MAX limit required.  In practise this
+# doesn't need to be exact, just at least one more than a common limit.
+#
+_DISTFILES_USE_TMPFILE=	no
+_DISTFILES_INPUT=	/dev/stdin
+#
+.if defined(CHECKSUM_LIMIT)
+CHECKSUM_ARGMAX_cmd=	getconf ARG_MAX || ${ECHO} 0
+.  if ${CHECKSUM_ARGMAX_cmd:sh} < ${CHECKSUM_LIMIT:U0}
+_DISTFILES_USE_TMPFILE=	yes
+_CHECKSUM_TMPFILE_cmd=	mktemp /tmp/checksum.XXXXXXXX
+USE_TOOLS+=		mktemp
+.  endif
+.endif
+
+.if defined(TOOLS_PLATFORM.mktool)
+_CHECKSUM_CMD=	${TOOLS_PLATFORM.mktool} checksum
+.else
 _CHECKSUM_CMD=								\
 	${PKGSRC_SETENV}						\
 	    DIGEST=${TOOLS_DIGEST:Q} SED=${TOOLS_CMDLINE_SED:Q}		\
 	    ${AWK} -f ${PKGSRCDIR}/mk/checksum/checksum.awk --
-
+.endif
 
 .if defined(NO_CHECKSUM) || empty(_CKSUMFILES)
 checksum checksum-phase:
 	@${DO_NADA}
 .else
 checksum checksum-phase:
+.  if ${_DISTFILES_USE_TMPFILE} == yes
+	${_DISTFILES_INPUT::=${_CHECKSUM_TMPFILE_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+	@${ECHO} ${file} >> ${_DISTFILES_INPUT}
+.    endfor
+	${RUN} set -e;							\
+	case ${.TARGET:Q} in						\
+	*-phase)	${TEST} ! -f ${_COOKIE.checksum} || exit 0 ;;	\
+	esac;								\
+	if cd ${DISTDIR} && ${_CHECKSUM_CMD} -I ${_DISTFILES_INPUT} ${DISTINFO_FILE:tA}; then \
+		${TRUE};						\
+	else								\
+		${ERROR_MSG} "Make sure the Makefile and checksum file (${DISTINFO_FILE})"; \
+		${ERROR_MSG} "are up to date.  If you want to override this check, type"; \
+		${ERROR_MSG} "\"${MAKE} NO_CHECKSUM=yes [other args]\"."; \
+		exit 1;							\
+	fi
+.  else
 	${RUN} set -e;							\
 	case ${.TARGET:Q} in						\
 	*-phase)	${TEST} ! -f ${_COOKIE.checksum} || exit 0 ;;	\
 	esac;								\
-	if cd ${DISTDIR} && ${_CHECKSUM_CMD} ${DISTINFO_FILE:tA} ${_CKSUMFILES}; then \
+	cd ${DISTDIR};							\
+	if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }			\
+	    | ${_CHECKSUM_CMD} -I ${_DISTFILES_INPUT} ${DISTINFO_FILE:tA}; then \
 		${TRUE};						\
 	else								\
 		${ERROR_MSG} "Make sure the Makefile and checksum file (${DISTINFO_FILE})"; \
@@ -48,11 +90,16 @@ checksum checksum-phase:
 		${ERROR_MSG} "\"${MAKE} NO_CHECKSUM=yes [other args]\"."; \
 		exit 1;							\
 	fi
+.  endif
 .endif
 
+.if defined(TOOLS_PLATFORM.mktool)
+_DISTINFO_CMD=	${TOOLS_PLATFORM.mktool} distinfo
+.else
 _DISTINFO_CMD=	${PKGSRC_SETENV} DIGEST=${TOOLS_DIGEST:Q} SED=${TOOLS_SED:Q} \
 			TEST=${TOOLS_TEST:Q} WC=${TOOLS_WC:Q}	\
 		${AWK} -f ${PKGSRCDIR}/mk/checksum/distinfo.awk --
+.endif
 
 .if exists(${DISTDIR})
 _DISTINFO_ARGS_COMMON+=	-d ${DISTDIR}
@@ -70,16 +117,30 @@ _DISTINFO_ARGS_COMMON+=	${_PATCH_DIGEST_ALGORITHMS:S/^/-p /}
 _DISTINFO_ARGS_PATCHSUM+=	${PATCHDIR}/patch-*
 _DISTINFO_ARGS_PATCHSUM+=	${PATCHDIR}/emul-*-patch-*
 
-_DISTINFO_INPUTFILE=		${DISTINFO_FILE}.filelist
-
 distinfo:
-.for file in ${_CKSUMFILES}
-	@${ECHO} ${file} >> ${_DISTINFO_INPUTFILE}
-.endfor
+.  if ${_DISTFILES_USE_TMPFILE} == yes
+	${_DISTFILES_INPUT::=${_CHECKSUM_TMPFILE_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+	@${ECHO} ${file} >> ${_DISTFILES_INPUT}
+.    endfor
 	${RUN}set -e;							\
 	newfile=${DISTINFO_FILE}.$$$$;					\
 	if ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}			\
-		-I ${_DISTINFO_INPUTFILE} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile;				\
+		-I ${_DISTFILES_INPUT} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile; \
+	then								\
+		${RM} -f $$newfile;					\
+		${ECHO_MSG} "=> distinfo: unchanged.";			\
+	else								\
+		${RM} -f ${DISTINFO_FILE};				\
+		${MV} -f $$newfile ${DISTINFO_FILE};			\
+	fi
+	@${RM} -f ${_DISTFILES_INPUT}
+.  else
+	${RUN}set -e;							\
+	newfile=${DISTINFO_FILE}.$$$$;					\
+	if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }	\
+	    | ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}			\
+		-I ${_DISTFILES_INPUT} ${_DISTINFO_ARGS_PATCHSUM} > $$newfile; \
 	then								\
 		${RM} -f $$newfile;					\
 		${ECHO_MSG} "=> distinfo: unchanged.";			\
@@ -87,16 +148,32 @@ distinfo:
 		${RM} -f ${DISTINFO_FILE};				\
 		${MV} -f $$newfile ${DISTINFO_FILE};			\
 	fi
-	@rm ${_DISTINFO_INPUTFILE}
+.  endif
 
 makesum:
-.for file in ${_CKSUMFILES}
-	@${ECHO} ${file} >> ${_DISTINFO_INPUTFILE}
-.endfor
+.  if ${_DISTFILES_USE_TMPFILE} == yes
+	${_DISTFILES_INPUT::=${_CHECKSUM_TMPFILE_cmd:sh}}
+.    for file in ${_CKSUMFILES}
+	@${ECHO} ${file} >> ${_DISTFILES_INPUT}
+.    endfor
 	${RUN}set -e;							\
 	newfile=${DISTINFO_FILE}.$$$$;					\
 	if ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}			\
-		-I ${_DISTINFO_INPUTFILE} > $$newfile;			\
+		-I ${_DISTFILES_INPUT} > $$newfile;			\
+	then								\
+		${RM} -f $$newfile;					\
+		${ECHO_MSG} "=> distinfo: distfiles part unchanged.";	\
+	else								\
+		${RM} -f ${DISTINFO_FILE};				\
+		${MV} -f $$newfile ${DISTINFO_FILE};			\
+	fi
+	@${RM} -f ${_DISTFILES_INPUT}
+.  else
+	${RUN}set -e;							\
+	newfile=${DISTINFO_FILE}.$$$$;					\
+	if { ${_CKSUMFILES:@f@ ${ECHO} ${f};@} }			\
+	    | ${_DISTINFO_CMD} ${_DISTINFO_ARGS_COMMON}			\
+		-I ${_DISTFILES_INPUT} > $$newfile;			\
 	then								\
 		${RM} -f $$newfile;					\
 		${ECHO_MSG} "=> distinfo: distfiles part unchanged.";	\
@@ -104,7 +181,7 @@ makesum:
 		${RM} -f ${DISTINFO_FILE};				\
 		${MV} -f $$newfile ${DISTINFO_FILE};			\
 	fi
-	@rm ${_DISTINFO_INPUTFILE}
+.  endif
 
 makepatchsum:
 	${RUN}set -e;							\


Home | Main Index | Thread Index | Old Index