################################################################################ ## ## $Revision: 8 $ ## $Author: mhx $ ## $Date: 2005/03/10 18:08:40 +0100 $ ## ################################################################################ ## ## Version 3.x, Copyright (C) 2004-2005, Marcus Holland-Moritz. ## Version 2.x, Copyright (C) 2001, Paul Marquess. ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski. ## ## This program is free software; you can redistribute it and/or ## modify it under the same terms as Perl itself. ## ################################################################################ =provides grok_hex grok_oct grok_bin grok_numeric_radix grok_number __UNDEFINED__ =implementation __UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling) __UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) __UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) __UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) __UNDEFINED__ IS_NUMBER_IN_UV 0x01 __UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02 __UNDEFINED__ IS_NUMBER_NOT_INT 0x04 __UNDEFINED__ IS_NUMBER_NEG 0x08 __UNDEFINED__ IS_NUMBER_INFINITY 0x10 __UNDEFINED__ IS_NUMBER_NAN 0x20 /* GROK_NUMERIC_RADIX depends on grok_numeric_radix */ __UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) __UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02 __UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04 __UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01 __UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02 #ifndef grok_numeric_radix #if { NEED grok_numeric_radix } bool grok_numeric_radix(pTHX_ const char **sp, const char *send) { #ifdef USE_LOCALE_NUMERIC #ifdef PL_numeric_radix_sv if (PL_numeric_radix_sv && IN_LOCALE) { STRLEN len; char* radix = SvPV(PL_numeric_radix_sv, len); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #else /* older perls don't have PL_numeric_radix_sv so the radix * must manually be requested from locale.h */ #include dTHR; /* needed for older threaded perls */ struct lconv *lc = localeconv(); char *radix = lc->decimal_point; if (radix && IN_LOCALE) { STRLEN len = strlen(radix); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #endif /* PERL_VERSION */ #endif /* USE_LOCALE_NUMERIC */ /* always try "." if numeric radix didn't match because * we may have data from different locales mixed */ if (*sp < send && **sp == '.') { ++*sp; return TRUE; } return FALSE; } #endif #endif /* grok_number depends on grok_numeric_radix */ #ifndef grok_number #if { NEED grok_number } int grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) { const char *s = pv; const char *send = pv + len; const UV max_div_10 = UV_MAX / 10; const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; while (s < send && isSPACE(*s)) s++; if (s == send) { return 0; } else if (*s == '-') { s++; numtype = IS_NUMBER_NEG; } else if (*s == '+') s++; if (s == send) return 0; /* next must be digit or the radix separator or beginning of infinity */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ UV value = *s - '0'; /* This construction seems to be more optimiser friendly. (without it gcc does the isDIGIT test and the *s - '0' separately) With it gcc on arm is managing 6 instructions (6 cycles) per digit. In theory the optimiser could deduce how far to unroll the loop before checking for overflow. */ if (++s < send) { int digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { /* Now got 9 digits, so need to check each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 && (value < max_div_10 || (value == max_div_10 && digit <= max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; else break; } if (digit >= 0 && digit <= 9 && (s < send)) { /* value overflowed. skip the remaining digits, don't worry about setting *valuep. */ do { s++; } while (s < send && isDIGIT(*s)); numtype |= IS_NUMBER_GREATER_THAN_UV_MAX; goto skip_value; } } } } } } } } } } } } } } } } } } numtype |= IS_NUMBER_IN_UV; if (valuep) *valuep = value; skip_value: if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT; while (s < send && isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ /* no digits before the radix means we need digits after it */ if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); if (valuep) { /* integer approximation is valid - it's 0. */ *valuep = 0; } } else return 0; } else if (*s == 'I' || *s == 'i') { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; s++; if (s < send && (*s == 'I' || *s == 'i')) { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; s++; if (s == send || (*s != 'T' && *s != 't')) return 0; s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; s++; } sawinf = 1; } else if (*s == 'N' || *s == 'n') { /* XXX TODO: There are signaling NaNs and quiet NaNs. */ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; sawnan = 1; } else return 0; if (sawinf) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { /* The only flag we keep is sign. Blow away any "it's UV" */ numtype &= IS_NUMBER_NEG; numtype |= IS_NUMBER_NOT_INT; s++; if (s < send && (*s == '-' || *s == '+')) s++; if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); } else return 0; } } while (s < send && isSPACE(*s)) s++; if (s >= send) return numtype; if (len == 10 && memEQ(pv, "0 but true", 10)) { if (valuep) *valuep = 0; return IS_NUMBER_IN_UV; } return 0; } #endif #endif /* * The grok_* routines have been modified to use warn() instead of * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, * which is why the stack variable has been renamed to 'xdigit'. */ #ifndef grok_bin #if { NEED grok_bin } UV grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_2 = UV_MAX / 2; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b. for compatibility silently suffer "b" and "0b" as valid binary numbers. */ if (len >= 1) { if (s[0] == 'b') { s++; len--; } else if (len >= 2 && s[0] == '0' && s[1] == 'b') { s+=2; len-=2; } } } for (; len-- && *s; s++) { char bit = *s; if (bit == '0' || bit == '1') { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. With gcc seems to be much straighter code than old scan_bin. */ redo: if (!overflowed) { if (value <= max_div_2) { value = (value << 1) | (bit - '0'); continue; } /* Bah. We're just overflowed. */ warn("Integer overflow in binary number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 2.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount. */ value_nv += (NV)(bit - '0'); continue; } if (bit == '_' && len && allow_underscores && (bit = s[1]) && (bit == '0' || bit == '1')) { --len; ++s; goto redo; } if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal binary digit '%c' ignored", *s); break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Binary number > 0b11111111111111111111111111111111 non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif #ifndef grok_hex #if { NEED grok_hex } UV grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_16 = UV_MAX / 16; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; const char *xdigit; if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading x or 0x. for compatibility silently suffer "x" and "0x" as valid hex numbers. */ if (len >= 1) { if (s[0] == 'x') { s++; len--; } else if (len >= 2 && s[0] == '0' && s[1] == 'x') { s+=2; len-=2; } } } for (; len-- && *s; s++) { xdigit = strchr((char *) PL_hexdigit, *s); if (xdigit) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. With gcc seems to be much straighter code than old scan_hex. */ redo: if (!overflowed) { if (value <= max_div_16) { value = (value << 4) | ((xdigit - PL_hexdigit) & 15); continue; } warn("Integer overflow in hexadecimal number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 16.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount of 16-tuples. */ value_nv += (NV)((xdigit - PL_hexdigit) & 15); continue; } if (*s == '_' && len && allow_underscores && s[1] && (xdigit = strchr((char *) PL_hexdigit, s[1]))) { --len; ++s; goto redo; } if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal hexadecimal digit '%c' ignored", *s); break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Hexadecimal number > 0xffffffff non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif #ifndef grok_oct #if { NEED grok_oct } UV grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) { const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_8 = UV_MAX / 8; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; for (; len-- && *s; s++) { /* gcc 2.95 optimiser not smart enough to figure that this subtraction out front allows slicker code. */ int digit = *s - '0'; if (digit >= 0 && digit <= 7) { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. */ redo: if (!overflowed) { if (value <= max_div_8) { value = (value << 3) | digit; continue; } /* Bah. We're just overflowed. */ warn("Integer overflow in octal number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 8.0; /* If an NV has not enough bits in its mantissa to * represent a UV this summing of small low-order numbers * is a waste of time (because the NV cannot preserve * the low-order bits anyway): we could just remember when * did we overflow and in the end just multiply value_nv by the * right amount of 8-tuples. */ value_nv += (NV)digit; continue; } if (digit == ('_' - '0') && len && allow_underscores && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) { --len; ++s; goto redo; } /* Allow \octal to work the DWIM way (that is, stop scanning * as soon as non-octal characters are seen, complain only iff * someone seems to want to use the digits eight and nine). */ if (digit == 8 || digit == 9) { if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) warn("Illegal octal digit '%c' ignored", *s); } break; } if ( ( overflowed && value_nv > 4294967295.0) #if UVSIZE > 4 || (!overflowed && value > 0xffffffff ) #endif ) { warn("Octal number > 037777777777 non-portable"); } *len_p = s - start; if (!overflowed) { *flags = 0; return value; } *flags = PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = value_nv; return UV_MAX; } #endif #endif =xsinit #define NEED_grok_number #define NEED_grok_numeric_radix #define NEED_grok_bin #define NEED_grok_hex #define NEED_grok_oct =xsubs UV grok_number(string) SV *string PREINIT: const char *pv; STRLEN len; CODE: pv = SvPV(string, len); if (!grok_number(pv, len, &RETVAL)) XSRETURN_UNDEF; OUTPUT: RETVAL UV grok_bin(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = grok_bin(pv, &len, &flags, NULL); OUTPUT: RETVAL UV grok_hex(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = grok_hex(pv, &len, &flags, NULL); OUTPUT: RETVAL UV grok_oct(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = grok_oct(pv, &len, &flags, NULL); OUTPUT: RETVAL UV Perl_grok_number(string) SV *string PREINIT: const char *pv; STRLEN len; CODE: pv = SvPV(string, len); if (!Perl_grok_number(aTHX_ pv, len, &RETVAL)) XSRETURN_UNDEF; OUTPUT: RETVAL UV Perl_grok_bin(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL); OUTPUT: RETVAL UV Perl_grok_hex(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL); OUTPUT: RETVAL UV Perl_grok_oct(string) SV *string PREINIT: char *pv; I32 flags; STRLEN len; CODE: pv = SvPV(string, len); RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL); OUTPUT: RETVAL =tests plan => 10 ok(&Devel::PPPort::grok_number("42"), 42); ok(!defined(&Devel::PPPort::grok_number("A"))); ok(&Devel::PPPort::grok_bin("10000001"), 129); ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef); ok(&Devel::PPPort::grok_oct("377"), 255); ok(&Devel::PPPort::Perl_grok_number("42"), 42); ok(!defined(&Devel::PPPort::Perl_grok_number("A"))); ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129); ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef); ok(&Devel::PPPort::Perl_grok_oct("377"), 255);