Unverified Commit 6b913e3d authored by Conor Dooley's avatar Conor Dooley Committed by Palmer Dabbelt
Browse files

RISC-V: rework comments in ISA string parser



I have found these comments to not be at all helpful whenever I look at
the parser. Further, the comments in the default case (single letter
parser) are not quite right either.
Group the comments into a larger one at the start of each case, that
attempts to explain things at a higher level.

Reviewed-by: default avatarAndrew Jones <ajones@ventanamicro.com>
Signed-off-by: default avatarConor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230607-headpiece-tannery-83ed5cc4856a@spud


Signed-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 069b0d51
Loading
Loading
Loading
Loading
+59 −11
Original line number Diff line number Diff line
@@ -164,7 +164,7 @@ void __init riscv_fill_hwcap(void)

			switch (*ext) {
			case 's':
				/**
				/*
				 * Workaround for invalid single-letter 's' & 'u'(QEMU).
				 * No need to set the bit in riscv_isa as 's' & 'u' are
				 * not valid ISA extensions. It works until multi-letter
@@ -181,53 +181,101 @@ void __init riscv_fill_hwcap(void)
			case 'X':
			case 'z':
			case 'Z':
				/*
				 * Before attempting to parse the extension itself, we find its end.
				 * As multi-letter extensions must be split from other multi-letter
				 * extensions with an "_", the end of a multi-letter extension will
				 * either be the null character or the "_" at the start of the next
				 * multi-letter extension.
				 *
				 * Next, as the extensions version is currently ignored, we
				 * eliminate that portion. This is done by parsing backwards from
				 * the end of the extension, removing any numbers. This may be a
				 * major or minor number however, so the process is repeated if a
				 * minor number was found.
				 *
				 * ext_end is intended to represent the first character *after* the
				 * name portion of an extension, but will be decremented to the last
				 * character itself while eliminating the extensions version number.
				 * A simple re-increment solves this problem.
				 */
				ext_long = true;
				/* Multi-letter extension must be delimited */
				for (; *isa && *isa != '_'; ++isa)
					if (unlikely(!isalnum(*isa)))
						ext_err = true;
				/* Parse backwards */

				ext_end = isa;
				if (unlikely(ext_err))
					break;

				if (!isdigit(ext_end[-1]))
					break;
				/* Skip the minor version */

				while (isdigit(*--ext_end))
					;
				if (tolower(ext_end[0]) != 'p'
				    || !isdigit(ext_end[-1])) {
					/* Advance it to offset the pre-decrement */

				if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
					++ext_end;
					break;
				}
				/* Skip the major version */

				while (isdigit(*--ext_end))
					;

				++ext_end;
				break;
			default:
				/*
				 * Things are a little easier for single-letter extensions, as they
				 * are parsed forwards.
				 *
				 * After checking that our starting position is valid, we need to
				 * ensure that, when isa was incremented at the start of the loop,
				 * that it arrived at the start of the next extension.
				 *
				 * If we are already on a non-digit, there is nothing to do. Either
				 * we have a multi-letter extension's _, or the start of an
				 * extension.
				 *
				 * Otherwise we have found the current extension's major version
				 * number. Parse past it, and a subsequent p/minor version number
				 * if present. The `p` extension must not appear immediately after
				 * a number, so there is no fear of missing it.
				 *
				 */
				if (unlikely(!isalpha(*ext))) {
					ext_err = true;
					break;
				}
				/* Find next extension */

				if (!isdigit(*isa))
					break;
				/* Skip the minor version */

				while (isdigit(*++isa))
					;

				if (tolower(*isa) != 'p')
					break;

				if (!isdigit(*++isa)) {
					--isa;
					break;
				}
				/* Skip the major version */

				while (isdigit(*++isa))
					;

				break;
			}

			/*
			 * The parser expects that at the start of an iteration isa points to the
			 * character before the start of the next extension. This will not be the
			 * case if we have just parsed a single-letter extension and the next
			 * extension is not a multi-letter extension prefixed with an "_". It is
			 * also not the case at the end of the string, where it will point to the
			 * terminating null character.
			 */
			if (*isa != '_')
				--isa;