| /******************************************************************************* |
| * Copyright (c) 2009-2011 Luaj.org. All rights reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| * THE SOFTWARE. |
| ******************************************************************************/ |
| package org.luaj.vm3.lib; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| import java.text.DecimalFormat; |
| |
| import org.luaj.vm3.LuaClosure; |
| import org.luaj.vm3.Buffer; |
| import org.luaj.vm3.LuaString; |
| import org.luaj.vm3.LuaTable; |
| import org.luaj.vm3.LuaValue; |
| import org.luaj.vm3.Varargs; |
| import org.luaj.vm3.compiler.DumpState; |
| |
| /** |
| * Subclass of {@link LibFunction} which implements the lua standard {@code string} |
| * library. |
| * <p> |
| * Typically, this library is included as part of a call to either |
| * {@link JsePlatform#standardGlobals()} or {@link JmePlatform#standardGlobals()} |
| * <pre> {@code |
| * Globals globals = JsePlatform.standardGlobals(); |
| * System.out.println( globals.get("string").get("upper").call( LuaValue.valueOf("abcde") ) ); |
| * } </pre> |
| * <p> |
| * To instantiate and use it directly, |
| * link it into your globals table via {@link LuaValue#load(LuaValue)} using code such as: |
| * <pre> {@code |
| * Globals globals = new Globals(); |
| * globals.load(new JseBaseLib()); |
| * globals.load(new PackageLib()); |
| * globals.load(new StringLib()); |
| * System.out.println( globals.get("string").get("upper").call( LuaValue.valueOf("abcde") ) ); |
| * } </pre> |
| * <p> |
| * This is a direct port of the corresponding library in C. |
| * @see LibFunction |
| * @see JsePlatform |
| * @see JmePlatform |
| * @see <a href="http://www.lua.org/manual/5.2/manual.html#6.4">Lua 5.2 String Lib Reference</a> |
| */ |
| public class StringLib extends TwoArgFunction { |
| |
| public static LuaTable instance; |
| |
| public StringLib() { |
| } |
| |
| public LuaValue call(LuaValue modname, LuaValue env) { |
| LuaTable t = new LuaTable(); |
| bind(t, StringLib1.class, new String[] { |
| "dump", "len", "lower", "reverse", "upper", } ); |
| bind(t, StringLibV.class, new String[] { |
| "byte", "char", "find", "format", |
| "gmatch", "gsub", "match", "rep", |
| "sub"} ); |
| env.set("string", t); |
| instance = t; |
| if ( LuaString.s_metatable == null ) |
| LuaString.s_metatable = tableOf( new LuaValue[] { INDEX, t } ); |
| env.get("package").get("loaded").set("string", t); |
| return t; |
| } |
| |
| static final class StringLib1 extends OneArgFunction { |
| public LuaValue call(LuaValue arg) { |
| switch ( opcode ) { |
| case 0: return dump(arg); // dump (function) |
| case 1: return StringLib.len(arg); // len (function) |
| case 2: return lower(arg); // lower (function) |
| case 3: return reverse(arg); // reverse (function) |
| case 4: return upper(arg); // upper (function) |
| } |
| return NIL; |
| } |
| } |
| |
| static final class StringLibV extends VarArgFunction { |
| public Varargs invoke(Varargs args) { |
| switch ( opcode ) { |
| case 0: return StringLib.byte_( args ); |
| case 1: return StringLib.char_( args ); |
| case 2: return StringLib.find( args ); |
| case 3: return StringLib.format( args ); |
| case 4: return StringLib.gmatch( args ); |
| case 5: return StringLib.gsub( args ); |
| case 6: return StringLib.match( args ); |
| case 7: return StringLib.rep( args ); |
| case 8: return StringLib.sub( args ); |
| } |
| return NONE; |
| } |
| } |
| |
| /** |
| * string.byte (s [, i [, j]]) |
| * |
| * Returns the internal numerical codes of the |
| * characters s[i], s[i+1], ..., s[j]. The default value for i is 1; the |
| * default value for j is i. |
| * |
| * Note that numerical codes are not necessarily portable across platforms. |
| * |
| * @param args the calling args |
| */ |
| static Varargs byte_( Varargs args ) { |
| LuaString s = args.checkstring(1); |
| int l = s.m_length; |
| int posi = posrelat( args.optint(2,1), l ); |
| int pose = posrelat( args.optint(3,posi), l ); |
| int n,i; |
| if (posi <= 0) posi = 1; |
| if (pose > l) pose = l; |
| if (posi > pose) return NONE; /* empty interval; return no values */ |
| n = (int)(pose - posi + 1); |
| if (posi + n <= pose) /* overflow? */ |
| error("string slice too long"); |
| LuaValue[] v = new LuaValue[n]; |
| for (i=0; i<n; i++) |
| v[i] = valueOf(s.luaByte(posi+i-1)); |
| return varargsOf(v); |
| } |
| |
| /** |
| * string.char (...) |
| * |
| * Receives zero or more integers. Returns a string with length equal |
| * to the number of arguments, in which each character has the internal |
| * numerical code equal to its corresponding argument. |
| * |
| * Note that numerical codes are not necessarily portable across platforms. |
| * |
| * @param args the calling VM |
| */ |
| public static Varargs char_( Varargs args) { |
| int n = args.narg(); |
| byte[] bytes = new byte[n]; |
| for ( int i=0, a=1; i<n; i++, a++ ) { |
| int c = args.checkint(a); |
| if (c<0 || c>=256) argerror(a, "invalid value"); |
| bytes[i] = (byte) c; |
| } |
| return LuaString.valueOf( bytes ); |
| } |
| |
| /** |
| * string.dump (function) |
| * |
| * Returns a string containing a binary representation of the given function, |
| * so that a later loadstring on this string returns a copy of the function. |
| * function must be a Lua function without upvalues. |
| * |
| * TODO: port dumping code as optional add-on |
| */ |
| static LuaValue dump( LuaValue arg ) { |
| LuaValue f = arg.checkfunction(); |
| ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| try { |
| DumpState.dump( ((LuaClosure)f).p, baos, true ); |
| return LuaString.valueOf(baos.toByteArray()); |
| } catch (IOException e) { |
| return error( e.getMessage() ); |
| } |
| } |
| |
| /** |
| * string.find (s, pattern [, init [, plain]]) |
| * |
| * Looks for the first match of pattern in the string s. |
| * If it finds a match, then find returns the indices of s |
| * where this occurrence starts and ends; otherwise, it returns nil. |
| * A third, optional numerical argument init specifies where to start the search; |
| * its default value is 1 and may be negative. A value of true as a fourth, |
| * optional argument plain turns off the pattern matching facilities, |
| * so the function does a plain "find substring" operation, |
| * with no characters in pattern being considered "magic". |
| * Note that if plain is given, then init must be given as well. |
| * |
| * If the pattern has captures, then in a successful match the captured values |
| * are also returned, after the two indices. |
| */ |
| static Varargs find( Varargs args ) { |
| return str_find_aux( args, true ); |
| } |
| |
| /** |
| * string.format (formatstring, ...) |
| * |
| * Returns a formatted version of its variable number of arguments following |
| * the description given in its first argument (which must be a string). |
| * The format string follows the same rules as the printf family of standard C functions. |
| * The only differences are that the options/modifiers *, l, L, n, p, and h are not supported |
| * and that there is an extra option, q. The q option formats a string in a form suitable |
| * to be safely read back by the Lua interpreter: the string is written between double quotes, |
| * and all double quotes, newlines, embedded zeros, and backslashes in the string are correctly |
| * escaped when written. For instance, the call |
| * string.format('%q', 'a string with "quotes" and \n new line') |
| * |
| * will produce the string: |
| * "a string with \"quotes\" and \ |
| * new line" |
| * |
| * The options c, d, E, e, f, g, G, i, o, u, X, and x all expect a number as argument, |
| * whereas q and s expect a string. |
| * |
| * This function does not accept string values containing embedded zeros, |
| * except as arguments to the q option. |
| */ |
| static Varargs format( Varargs args ) { |
| LuaString fmt = args.checkstring( 1 ); |
| final int n = fmt.length(); |
| Buffer result = new Buffer(n); |
| int arg = 1; |
| int c; |
| |
| for ( int i = 0; i < n; ) { |
| switch ( c = fmt.luaByte( i++ ) ) { |
| case '\n': |
| result.append( "\n" ); |
| break; |
| default: |
| result.append( (byte) c ); |
| break; |
| case L_ESC: |
| if ( i < n ) { |
| if ( ( c = fmt.luaByte( i ) ) == L_ESC ) { |
| ++i; |
| result.append( (byte)L_ESC ); |
| } else { |
| arg++; |
| args.checkvalue(arg); |
| FormatDesc fdsc = new FormatDesc(args, fmt, i ); |
| i += fdsc.length; |
| switch ( fdsc.conversion ) { |
| case 'c': |
| fdsc.format( result, (byte)args.checkint( arg ) ); |
| break; |
| case 'i': |
| case 'd': |
| // Still not right, but works better |
| double sNum = args.checkdouble( arg ); |
| long sINum = args.checklong( arg ); |
| double sDiff = sNum - sINum; |
| args.argcheck(-1 < sDiff && sDiff < 1, arg, "not a number in proper range"); |
| fdsc.format( result, args.checklong( arg ) ); |
| break; |
| case 'o': |
| case 'u': |
| case 'x': |
| case 'X': |
| // Still not right, but works better |
| double uNum = args.checkdouble( arg ); |
| long uINum = args.checklong( arg ); |
| double uDiff = uNum - uINum; |
| args.argcheck(-1 < uDiff && uDiff < 1 && uINum >= 0, arg, "not a non-negative number in proper range"); |
| fdsc.format( result, args.checklong( arg ) ); |
| break; |
| case 'e': |
| case 'E': |
| case 'f': |
| case 'g': |
| case 'G': |
| fdsc.format( result, args.checkdouble( arg ) ); |
| break; |
| case 'q': |
| addquoted( result, args.checkstring( arg ) ); |
| break; |
| case 's': { |
| LuaString s = args.checkstring( arg ); |
| if ( fdsc.precision == -1 && s.length() >= 100 ) { |
| result.append( s ); |
| } else { |
| fdsc.format( result, s ); |
| } |
| } break; |
| default: |
| error("invalid option '%"+(char)fdsc.conversion+"' to 'format'"); |
| break; |
| } |
| } |
| } |
| else |
| error("invalid option '%' to 'format'"); |
| } |
| } |
| |
| return result.tostring(); |
| } |
| |
| private static void addquoted(Buffer buf, LuaString s) { |
| int c; |
| buf.append( (byte) '"' ); |
| for ( int i = 0, n = s.length(); i < n; i++ ) { |
| switch ( c = s.luaByte( i ) ) { |
| case '"': case '\\': case '\n': |
| buf.append( (byte)'\\' ); |
| buf.append( (byte)c ); |
| break; |
| default: |
| if (c <= 0x1F || c == 0x7F) { |
| buf.append( (byte) '\\' ); |
| if (i+1 == n || s.luaByte(i+1) < '0' || s.luaByte(i+1) > '9') { |
| buf.append(Integer.toString(c)); |
| } else { |
| buf.append( (byte) '0' ); |
| buf.append( (byte) (char) ('0' + c / 10) ); |
| buf.append( (byte) (char) ('0' + c % 10) ); |
| } |
| } else { |
| buf.append((byte) c); |
| } |
| break; |
| } |
| } |
| buf.append( (byte) '"' ); |
| } |
| |
| private static final String FLAGS = "-+ #0"; |
| |
| static class FormatDesc { |
| |
| private boolean leftAdjust; |
| private boolean zeroPad; |
| private boolean explicitPlus; |
| private boolean space; |
| private boolean alternateForm; |
| private static final int MAX_FLAGS = 5; |
| |
| private int width; |
| private int precision; |
| |
| public final int conversion; |
| public final int length; |
| |
| public FormatDesc(Varargs args, LuaString strfrmt, final int start) { |
| int p = start, n = strfrmt.length(); |
| int c = 0; |
| |
| boolean moreFlags = true; |
| while ( moreFlags ) { |
| switch ( c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ) ) { |
| case '-': leftAdjust = true; break; |
| case '+': explicitPlus = true; break; |
| case ' ': space = true; break; |
| case '#': alternateForm = true; break; |
| case '0': zeroPad = true; break; |
| default: moreFlags = false; break; |
| } |
| } |
| if ( p - start - 1 > MAX_FLAGS ) |
| error("invalid format (repeated flags)"); |
| |
| width = -1; |
| if ( Character.isDigit( (char)c ) ) { |
| width = c - '0'; |
| c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); |
| if ( Character.isDigit( (char) c ) ) { |
| width = width * 10 + (c - '0'); |
| c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); |
| } |
| } |
| |
| precision = -1; |
| if ( c == '.' ) { |
| c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); |
| if ( Character.isDigit( (char) c ) ) { |
| precision = c - '0'; |
| c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); |
| if ( Character.isDigit( (char) c ) ) { |
| precision = precision * 10 + (c - '0'); |
| c = ( (p < n) ? strfrmt.luaByte( p++ ) : 0 ); |
| } |
| } |
| } |
| |
| if ( Character.isDigit( (char) c ) ) |
| error("invalid format (width or precision too long)"); |
| |
| zeroPad &= !leftAdjust; // '-' overrides '0' |
| conversion = c; |
| length = p - start; |
| } |
| |
| public void format(Buffer buf, byte c) { |
| if ( !leftAdjust ) |
| pad( buf, ' ', width - 1 ); |
| |
| buf.append(c); |
| |
| if ( leftAdjust ) |
| pad( buf, ' ', width - 1 ); |
| } |
| |
| public void format(Buffer buf, long number) { |
| String digits; |
| |
| if ( number == 0 && precision == 0 ) { |
| digits = ""; |
| } else { |
| int radix; |
| switch ( conversion ) { |
| case 'x': |
| case 'X': |
| radix = 16; |
| break; |
| case 'o': |
| radix = 8; |
| break; |
| default: |
| radix = 10; |
| break; |
| } |
| digits = Long.toString( number, radix ); |
| if ( conversion == 'X' ) |
| digits = digits.toUpperCase(); |
| } |
| |
| int minwidth = digits.length(); |
| int ndigits = minwidth; |
| int nzeros; |
| |
| boolean allowPlusSpace = conversion == 'd' || conversion == 'i'; |
| |
| if ( number < 0 ) { |
| ndigits--; |
| } else if ( allowPlusSpace && (explicitPlus || space) ) { |
| minwidth++; |
| } |
| |
| if ( alternateForm ) { |
| switch ( conversion ) { |
| case 'o': |
| minwidth++; |
| break; |
| case 'x': |
| case 'X': |
| minwidth+=2; |
| break; |
| } |
| } |
| |
| if ( precision > ndigits ) |
| nzeros = precision - ndigits; |
| else if ( precision == -1 && zeroPad && width > minwidth ) |
| nzeros = width - minwidth; |
| else |
| nzeros = 0; |
| |
| minwidth += nzeros; |
| int nspaces = width > minwidth ? width - minwidth : 0; |
| |
| if ( !leftAdjust ) |
| pad( buf, ' ', nspaces ); |
| |
| if ( number < 0 ) { |
| if ( nzeros > 0 ) { |
| buf.append( (byte)'-' ); |
| digits = digits.substring( 1 ); |
| } |
| } else if ( allowPlusSpace && explicitPlus ) { |
| buf.append( (byte)'+' ); |
| } else if ( allowPlusSpace && space ) { |
| buf.append( (byte)' ' ); |
| } |
| |
| if ( alternateForm ) { |
| switch ( conversion ) { |
| case 'o': |
| buf.append( (byte)'0' ); |
| break; |
| case 'x': |
| buf.append( "0x" ); |
| break; |
| case 'X': |
| buf.append( "0X" ); |
| break; |
| } |
| } |
| |
| if ( nzeros > 0 ) |
| pad( buf, '0', nzeros ); |
| |
| buf.append( digits ); |
| |
| if ( leftAdjust ) |
| pad( buf, ' ', nspaces ); |
| } |
| |
| public void format(Buffer buf, double number) { |
| // TODO: force positive sign |
| |
| int precise = precision == -1 ? 6 : precision; |
| String addDot = alternateForm || precise > 0 ? "." : ""; |
| |
| DecimalFormat scientificFormat = new DecimalFormat("0" + addDot + new String(new char[precise]).replace("\0", "0") + "E00"); |
| DecimalFormat floatingFormat = new DecimalFormat("0" + addDot + new String(new char[precise]).replace("\0", "0")); |
| |
| String digits; |
| |
| switch ( conversion ) { |
| case 'e': |
| case 'E': |
| digits = scientificFormat.format( number ); |
| break; |
| case 'f': |
| digits = floatingFormat.format(number); |
| break; |
| case 'g': |
| case 'G': |
| default: |
| // TODO: g, G |
| // TODO: precision |
| digits = String.valueOf( number ); |
| break; |
| } |
| |
| if ( conversion == 'e' || conversion == 'g' ) |
| digits = digits.toLowerCase(); |
| |
| int minwidth = digits.length(); |
| int ndigits = minwidth; |
| int nzeros; |
| |
| if ( number < 0 ) { |
| ndigits--; |
| } else if ( explicitPlus || space ) { |
| minwidth++; |
| } |
| |
| if ( precision > ndigits ) |
| nzeros = precision - ndigits; |
| else if ( precision == -1 && zeroPad && width > minwidth ) |
| nzeros = width - minwidth; |
| else |
| nzeros = 0; |
| |
| minwidth += nzeros; |
| int nspaces = width > minwidth ? width - minwidth : 0; |
| |
| if ( !leftAdjust ) |
| pad( buf, ' ', nspaces ); |
| |
| if ( number < 0 ) { |
| if ( nzeros > 0 ) { |
| buf.append( (byte)'-' ); |
| digits = digits.substring( 1 ); |
| } |
| } else if ( explicitPlus ) { |
| buf.append( (byte)'+' ); |
| } else if ( space ) { |
| buf.append( (byte)' ' ); |
| } |
| |
| if ( nzeros > 0 ) |
| pad( buf, '0', nzeros ); |
| |
| buf.append( digits ); |
| |
| if ( leftAdjust ) |
| pad( buf, ' ', nspaces ); |
| } |
| |
| public void format(Buffer buf, LuaString s) { |
| int nullindex = s.indexOf( (byte)'\0', 0 ); |
| if ( nullindex != -1 ) |
| s = s.substring( 0, nullindex ); |
| |
| int newLength = precision == -1 ? s.length() : Math.min(precision, s.length()); |
| |
| if ( !leftAdjust ) |
| pad( buf, ' ', width - newLength ); |
| |
| buf.append(s.substring(0, newLength)); |
| |
| if ( leftAdjust ) |
| pad( buf, ' ', width - newLength ); |
| } |
| |
| public static final void pad(Buffer buf, char c, int n) { |
| byte b = (byte)c; |
| while ( n-- > 0 ) |
| buf.append(b); |
| } |
| } |
| |
| /** |
| * string.gmatch (s, pattern) |
| * |
| * Returns an iterator function that, each time it is called, returns the next captures |
| * from pattern over string s. If pattern specifies no captures, then the |
| * whole match is produced in each call. |
| * |
| * As an example, the following loop |
| * s = "hello world from Lua" |
| * for w in string.gmatch(s, "%a+") do |
| * print(w) |
| * end |
| * |
| * will iterate over all the words from string s, printing one per line. |
| * The next example collects all pairs key=value from the given string into a table: |
| * t = {} |
| * s = "from=world, to=Lua" |
| * for k, v in string.gmatch(s, "(%w+)=(%w+)") do |
| * t[k] = v |
| * end |
| * |
| * For this function, a '^' at the start of a pattern does not work as an anchor, |
| * as this would prevent the iteration. |
| */ |
| static Varargs gmatch( Varargs args ) { |
| LuaString src = args.checkstring( 1 ); |
| LuaString pat = args.checkstring( 2 ); |
| return new GMatchAux(args, src, pat); |
| } |
| |
| static class GMatchAux extends VarArgFunction { |
| private final int srclen; |
| private final MatchState ms; |
| private int soffset; |
| public GMatchAux(Varargs args, LuaString src, LuaString pat) { |
| this.srclen = src.length(); |
| this.ms = new MatchState(args, src, pat); |
| this.soffset = 0; |
| } |
| public Varargs invoke(Varargs args) { |
| for ( ; soffset<srclen; soffset++ ) { |
| ms.reset(); |
| int res = ms.match(soffset, 0); |
| if ( res >=0 ) { |
| int soff = soffset; |
| soffset = res; |
| return ms.push_captures( true, soff, res ); |
| } |
| } |
| return NIL; |
| } |
| } |
| |
| |
| /** |
| * string.gsub (s, pattern, repl [, n]) |
| * Returns a copy of s in which all (or the first n, if given) occurrences of the |
| * pattern have been replaced by a replacement string specified by repl, which |
| * may be a string, a table, or a function. gsub also returns, as its second value, |
| * the total number of matches that occurred. |
| * |
| * If repl is a string, then its value is used for replacement. |
| * The character % works as an escape character: any sequence in repl of the form %n, |
| * with n between 1 and 9, stands for the value of the n-th captured substring (see below). |
| * The sequence %0 stands for the whole match. The sequence %% stands for a single %. |
| * |
| * If repl is a table, then the table is queried for every match, using the first capture |
| * as the key; if the pattern specifies no captures, then the whole match is used as the key. |
| * |
| * If repl is a function, then this function is called every time a match occurs, |
| * with all captured substrings passed as arguments, in order; if the pattern specifies |
| * no captures, then the whole match is passed as a sole argument. |
| * |
| * If the value returned by the table query or by the function call is a string or a number, |
| * then it is used as the replacement string; otherwise, if it is false or nil, |
| * then there is no replacement (that is, the original match is kept in the string). |
| * |
| * Here are some examples: |
| * x = string.gsub("hello world", "(%w+)", "%1 %1") |
| * --> x="hello hello world world" |
| * |
| * x = string.gsub("hello world", "%w+", "%0 %0", 1) |
| * --> x="hello hello world" |
| * |
| * x = string.gsub("hello world from Lua", "(%w+)%s*(%w+)", "%2 %1") |
| * --> x="world hello Lua from" |
| * |
| * x = string.gsub("home = $HOME, user = $USER", "%$(%w+)", os.getenv) |
| * --> x="home = /home/roberto, user = roberto" |
| * |
| * x = string.gsub("4+5 = $return 4+5$", "%$(.-)%$", function (s) |
| * return loadstring(s)() |
| * end) |
| * --> x="4+5 = 9" |
| * |
| * local t = {name="lua", version="5.1"} |
| * x = string.gsub("$name-$version.tar.gz", "%$(%w+)", t) |
| * --> x="lua-5.1.tar.gz" |
| */ |
| static Varargs gsub( Varargs args ) { |
| LuaString src = args.checkstring( 1 ); |
| final int srclen = src.length(); |
| LuaString p = args.checkstring( 2 ); |
| LuaValue repl = args.arg( 3 ); |
| int max_s = args.optint( 4, srclen + 1 ); |
| final boolean anchor = p.length() > 0 && p.charAt( 0 ) == '^'; |
| |
| Buffer lbuf = new Buffer( srclen ); |
| MatchState ms = new MatchState( args, src, p ); |
| |
| int soffset = 0; |
| int n = 0; |
| while ( n < max_s ) { |
| ms.reset(); |
| int res = ms.match( soffset, anchor ? 1 : 0 ); |
| if ( res != -1 ) { |
| n++; |
| ms.add_value( lbuf, soffset, res, repl ); |
| } |
| if ( res != -1 && res > soffset ) |
| soffset = res; |
| else if ( soffset < srclen ) |
| lbuf.append( (byte) src.luaByte( soffset++ ) ); |
| else |
| break; |
| if ( anchor ) |
| break; |
| } |
| lbuf.append( src.substring( soffset, srclen ) ); |
| return varargsOf(lbuf.tostring(), valueOf(n)); |
| } |
| |
| /** |
| * string.len (s) |
| * |
| * Receives a string and returns its length. The empty string "" has length 0. |
| * Embedded zeros are counted, so "a\000bc\000" has length 5. |
| */ |
| static LuaValue len( LuaValue arg ) { |
| return arg.checkstring().len(); |
| } |
| |
| /** |
| * string.lower (s) |
| * |
| * Receives a string and returns a copy of this string with all uppercase letters |
| * changed to lowercase. All other characters are left unchanged. |
| * The definition of what an uppercase letter is depends on the current locale. |
| */ |
| static LuaValue lower( LuaValue arg ) { |
| return valueOf( arg.checkjstring().toLowerCase() ); |
| } |
| |
| /** |
| * string.match (s, pattern [, init]) |
| * |
| * Looks for the first match of pattern in the string s. If it finds one, |
| * then match returns the captures from the pattern; otherwise it returns |
| * nil. If pattern specifies no captures, then the whole match is returned. |
| * A third, optional numerical argument init specifies where to start the |
| * search; its default value is 1 and may be negative. |
| */ |
| static Varargs match( Varargs args ) { |
| return str_find_aux( args, false ); |
| } |
| |
| /** |
| * string.rep (s, n) |
| * |
| * Returns a string that is the concatenation of n copies of the string s. |
| */ |
| static Varargs rep( Varargs args ) { |
| LuaString s = args.checkstring( 1 ); |
| int n = Math.max( args.checkint( 2 ), 0 ); |
| final byte[] bytes = new byte[ s.length() * n ]; |
| int len = s.length(); |
| for ( int offset = 0; offset < bytes.length; offset += len ) { |
| s.copyInto( 0, bytes, offset, len ); |
| } |
| return LuaString.valueOf( bytes ); |
| } |
| |
| /** |
| * string.reverse (s) |
| * |
| * Returns a string that is the string s reversed. |
| */ |
| static LuaValue reverse( LuaValue arg ) { |
| LuaString s = arg.checkstring(); |
| int n = s.length(); |
| byte[] b = new byte[n]; |
| for ( int i=0, j=n-1; i<n; i++, j-- ) |
| b[j] = (byte) s.luaByte(i); |
| return LuaString.valueOf( b ); |
| } |
| |
| /** |
| * string.sub (s, i [, j]) |
| * |
| * Returns the substring of s that starts at i and continues until j; |
| * i and j may be negative. If j is absent, then it is assumed to be equal to -1 |
| * (which is the same as the string length). In particular, the call |
| * string.sub(s,1,j) |
| * returns a prefix of s with length j, and |
| * string.sub(s, -i) |
| * returns a suffix of s with length i. |
| */ |
| static Varargs sub( Varargs args ) { |
| final LuaString s = args.checkstring( 1 ); |
| final int l = s.length(); |
| |
| int start = posrelat( args.checkint( 2 ), l ); |
| int end = posrelat( args.optint( 3, -1 ), l ); |
| |
| if ( start < 1 ) |
| start = 1; |
| if ( end > l ) |
| end = l; |
| |
| if ( start <= end ) { |
| return s.substring( start-1 , end ); |
| } else { |
| return EMPTYSTRING; |
| } |
| } |
| |
| /** |
| * string.upper (s) |
| * |
| * Receives a string and returns a copy of this string with all lowercase letters |
| * changed to uppercase. All other characters are left unchanged. |
| * The definition of what a lowercase letter is depends on the current locale. |
| */ |
| static LuaValue upper( LuaValue arg ) { |
| return valueOf(arg.checkjstring().toUpperCase()); |
| } |
| |
| /** |
| * This utility method implements both string.find and string.match. |
| */ |
| static Varargs str_find_aux( Varargs args, boolean find ) { |
| LuaString s = args.checkstring( 1 ); |
| LuaString pat = args.checkstring( 2 ); |
| int init = args.optint( 3, 1 ); |
| |
| if ( init > 0 ) { |
| init = Math.min( init - 1, s.length() ); |
| } else if ( init < 0 ) { |
| init = Math.max( 0, s.length() + init ); |
| } |
| |
| boolean fastMatch = find && ( args.arg(4).toboolean() || pat.indexOfAny( SPECIALS ) == -1 ); |
| |
| if ( fastMatch ) { |
| int result = s.indexOf( pat, init ); |
| if ( result != -1 ) { |
| return varargsOf( valueOf(result+1), valueOf(result+pat.length()) ); |
| } |
| } else { |
| MatchState ms = new MatchState( args, s, pat ); |
| |
| boolean anchor = false; |
| int poff = 0; |
| if ( pat.luaByte( 0 ) == '^' ) { |
| anchor = true; |
| poff = 1; |
| } |
| |
| int soff = init; |
| do { |
| int res; |
| ms.reset(); |
| if ( ( res = ms.match( soff, poff ) ) != -1 ) { |
| if ( find ) { |
| return varargsOf( valueOf(soff+1), valueOf(res), ms.push_captures( false, soff, res )); |
| } else { |
| return ms.push_captures( true, soff, res ); |
| } |
| } |
| } while ( soff++ < s.length() && !anchor ); |
| } |
| return NIL; |
| } |
| |
| private static int posrelat( int pos, int len ) { |
| return ( pos >= 0 ) ? pos : len + pos + 1; |
| } |
| |
| // Pattern matching implementation |
| |
| private static final int L_ESC = '%'; |
| private static final LuaString SPECIALS = valueOf("^$*+?.([%-"); |
| private static final int MAX_CAPTURES = 32; |
| |
| private static final int CAP_UNFINISHED = -1; |
| private static final int CAP_POSITION = -2; |
| |
| private static final byte MASK_ALPHA = 0x01; |
| private static final byte MASK_LOWERCASE = 0x02; |
| private static final byte MASK_UPPERCASE = 0x04; |
| private static final byte MASK_DIGIT = 0x08; |
| private static final byte MASK_PUNCT = 0x10; |
| private static final byte MASK_SPACE = 0x20; |
| private static final byte MASK_CONTROL = 0x40; |
| private static final byte MASK_HEXDIGIT = (byte)0x80; |
| |
| private static final byte[] CHAR_TABLE; |
| |
| static { |
| CHAR_TABLE = new byte[256]; |
| |
| for ( int i = 0; i < 256; ++i ) { |
| final char c = (char) i; |
| CHAR_TABLE[i] = (byte)( ( Character.isDigit( c ) ? MASK_DIGIT : 0 ) | |
| ( Character.isLowerCase( c ) ? MASK_LOWERCASE : 0 ) | |
| ( Character.isUpperCase( c ) ? MASK_UPPERCASE : 0 ) | |
| ( ( c < ' ' || c == 0x7F ) ? MASK_CONTROL : 0 ) ); |
| if ( ( c >= 'a' && c <= 'f' ) || ( c >= 'A' && c <= 'F' ) || ( c >= '0' && c <= '9' ) ) { |
| CHAR_TABLE[i] |= MASK_HEXDIGIT; |
| } |
| if ( ( c >= '!' && c <= '/' ) || ( c >= ':' && c <= '@' ) ) { |
| CHAR_TABLE[i] |= MASK_PUNCT; |
| } |
| if ( ( CHAR_TABLE[i] & ( MASK_LOWERCASE | MASK_UPPERCASE ) ) != 0 ) { |
| CHAR_TABLE[i] |= MASK_ALPHA; |
| } |
| } |
| |
| CHAR_TABLE[' '] = MASK_SPACE; |
| CHAR_TABLE['\r'] |= MASK_SPACE; |
| CHAR_TABLE['\n'] |= MASK_SPACE; |
| CHAR_TABLE['\t'] |= MASK_SPACE; |
| CHAR_TABLE[0x0C /* '\v' */ ] |= MASK_SPACE; |
| CHAR_TABLE['\f'] |= MASK_SPACE; |
| }; |
| |
| static class MatchState { |
| final LuaString s; |
| final LuaString p; |
| final Varargs args; |
| int level; |
| int[] cinit; |
| int[] clen; |
| |
| MatchState( Varargs args, LuaString s, LuaString pattern ) { |
| this.s = s; |
| this.p = pattern; |
| this.args = args; |
| this.level = 0; |
| this.cinit = new int[ MAX_CAPTURES ]; |
| this.clen = new int[ MAX_CAPTURES ]; |
| } |
| |
| void reset() { |
| level = 0; |
| } |
| |
| private void add_s( Buffer lbuf, LuaString news, int soff, int e ) { |
| int l = news.length(); |
| for ( int i = 0; i < l; ++i ) { |
| byte b = (byte) news.luaByte( i ); |
| if ( b != L_ESC ) { |
| lbuf.append( (byte) b ); |
| } else { |
| ++i; // skip ESC |
| b = (byte) news.luaByte( i ); |
| if ( !Character.isDigit( (char) b ) ) { |
| lbuf.append( b ); |
| } else if ( b == '0' ) { |
| lbuf.append( s.substring( soff, e ) ); |
| } else { |
| lbuf.append( push_onecapture( b - '1', soff, e ).strvalue() ); |
| } |
| } |
| } |
| } |
| |
| public void add_value( Buffer lbuf, int soffset, int end, LuaValue repl ) { |
| switch ( repl.type() ) { |
| case LuaValue.TSTRING: |
| case LuaValue.TNUMBER: |
| add_s( lbuf, repl.strvalue(), soffset, end ); |
| return; |
| |
| case LuaValue.TFUNCTION: |
| repl = repl.invoke( push_captures( true, soffset, end ) ).arg1(); |
| break; |
| |
| case LuaValue.TTABLE: |
| // Need to call push_onecapture here for the error checking |
| repl = repl.get( push_onecapture( 0, soffset, end ) ); |
| break; |
| |
| default: |
| error( "bad argument: string/function/table expected" ); |
| return; |
| } |
| |
| if ( !repl.toboolean() ) { |
| repl = s.substring( soffset, end ); |
| } else if ( ! repl.isstring() ) { |
| error( "invalid replacement value (a "+repl.typename()+")" ); |
| } |
| lbuf.append( repl.strvalue() ); |
| } |
| |
| Varargs push_captures( boolean wholeMatch, int soff, int end ) { |
| int nlevels = ( this.level == 0 && wholeMatch ) ? 1 : this.level; |
| switch ( nlevels ) { |
| case 0: return NONE; |
| case 1: return push_onecapture( 0, soff, end ); |
| } |
| LuaValue[] v = new LuaValue[nlevels]; |
| for ( int i = 0; i < nlevels; ++i ) |
| v[i] = push_onecapture( i, soff, end ); |
| return varargsOf(v); |
| } |
| |
| private LuaValue push_onecapture( int i, int soff, int end ) { |
| if ( i >= this.level ) { |
| if ( i == 0 ) { |
| return s.substring( soff, end ); |
| } else { |
| return error( "invalid capture index" ); |
| } |
| } else { |
| int l = clen[i]; |
| if ( l == CAP_UNFINISHED ) { |
| return error( "unfinished capture" ); |
| } |
| if ( l == CAP_POSITION ) { |
| return valueOf( cinit[i] + 1 ); |
| } else { |
| int begin = cinit[i]; |
| return s.substring( begin, begin + l ); |
| } |
| } |
| } |
| |
| private int check_capture( int l ) { |
| l -= '1'; |
| if ( l < 0 || l >= level || this.clen[l] == CAP_UNFINISHED ) { |
| error("invalid capture index"); |
| } |
| return l; |
| } |
| |
| private int capture_to_close() { |
| int level = this.level; |
| for ( level--; level >= 0; level-- ) |
| if ( clen[level] == CAP_UNFINISHED ) |
| return level; |
| error("invalid pattern capture"); |
| return 0; |
| } |
| |
| int classend( int poffset ) { |
| switch ( p.luaByte( poffset++ ) ) { |
| case L_ESC: |
| if ( poffset == p.length() ) { |
| error( "malformed pattern (ends with %)" ); |
| } |
| return poffset + 1; |
| |
| case '[': |
| if ( p.luaByte( poffset ) == '^' ) poffset++; |
| do { |
| if ( poffset == p.length() ) { |
| error( "malformed pattern (missing ])" ); |
| } |
| if ( p.luaByte( poffset++ ) == L_ESC && poffset != p.length() ) |
| poffset++; |
| } while ( p.luaByte( poffset ) != ']' ); |
| return poffset + 1; |
| default: |
| return poffset; |
| } |
| } |
| |
| static boolean match_class( int c, int cl ) { |
| final char lcl = Character.toLowerCase( (char) cl ); |
| int cdata = CHAR_TABLE[c]; |
| |
| boolean res; |
| switch ( lcl ) { |
| case 'a': res = ( cdata & MASK_ALPHA ) != 0; break; |
| case 'd': res = ( cdata & MASK_DIGIT ) != 0; break; |
| case 'l': res = ( cdata & MASK_LOWERCASE ) != 0; break; |
| case 'u': res = ( cdata & MASK_UPPERCASE ) != 0; break; |
| case 'c': res = ( cdata & MASK_CONTROL ) != 0; break; |
| case 'p': res = ( cdata & MASK_PUNCT ) != 0; break; |
| case 's': res = ( cdata & MASK_SPACE ) != 0; break; |
| case 'w': res = ( cdata & ( MASK_ALPHA | MASK_DIGIT ) ) != 0; break; |
| case 'x': res = ( cdata & MASK_HEXDIGIT ) != 0; break; |
| case 'z': res = ( c == 0 ); break; |
| default: return cl == c; |
| } |
| return ( lcl == cl ) ? res : !res; |
| } |
| |
| boolean matchbracketclass( int c, int poff, int ec ) { |
| boolean sig = true; |
| if ( p.luaByte( poff + 1 ) == '^' ) { |
| sig = false; |
| poff++; |
| } |
| while ( ++poff < ec ) { |
| if ( p.luaByte( poff ) == L_ESC ) { |
| poff++; |
| if ( match_class( c, p.luaByte( poff ) ) ) |
| return sig; |
| } |
| else if ( ( p.luaByte( poff + 1 ) == '-' ) && ( poff + 2 < ec ) ) { |
| poff += 2; |
| if ( p.luaByte( poff - 2 ) <= c && c <= p.luaByte( poff ) ) |
| return sig; |
| } |
| else if ( p.luaByte( poff ) == c ) return sig; |
| } |
| return !sig; |
| } |
| |
| boolean singlematch( int c, int poff, int ep ) { |
| switch ( p.luaByte( poff ) ) { |
| case '.': return true; |
| case L_ESC: return match_class( c, p.luaByte( poff + 1 ) ); |
| case '[': return matchbracketclass( c, poff, ep - 1 ); |
| default: return p.luaByte( poff ) == c; |
| } |
| } |
| |
| /** |
| * Perform pattern matching. If there is a match, returns offset into s |
| * where match ends, otherwise returns -1. |
| */ |
| int match( int soffset, int poffset ) { |
| while ( true ) { |
| // Check if we are at the end of the pattern - |
| // equivalent to the '\0' case in the C version, but our pattern |
| // string is not NUL-terminated. |
| if ( poffset == p.length() ) |
| return soffset; |
| switch ( p.luaByte( poffset ) ) { |
| case '(': |
| if ( ++poffset < p.length() && p.luaByte( poffset ) == ')' ) |
| return start_capture( soffset, poffset + 1, CAP_POSITION ); |
| else |
| return start_capture( soffset, poffset, CAP_UNFINISHED ); |
| case ')': |
| return end_capture( soffset, poffset + 1 ); |
| case L_ESC: |
| if ( poffset + 1 == p.length() ) |
| error("malformed pattern (ends with '%')"); |
| switch ( p.luaByte( poffset + 1 ) ) { |
| case 'b': |
| soffset = matchbalance( soffset, poffset + 2 ); |
| if ( soffset == -1 ) return -1; |
| poffset += 4; |
| continue; |
| case 'f': { |
| poffset += 2; |
| if ( p.luaByte( poffset ) != '[' ) { |
| error("Missing [ after %f in pattern"); |
| } |
| int ep = classend( poffset ); |
| int previous = ( soffset == 0 ) ? -1 : s.luaByte( soffset - 1 ); |
| if ( matchbracketclass( previous, poffset, ep - 1 ) || |
| matchbracketclass( s.luaByte( soffset ), poffset, ep - 1 ) ) |
| return -1; |
| poffset = ep; |
| continue; |
| } |
| default: { |
| int c = p.luaByte( poffset + 1 ); |
| if ( Character.isDigit( (char) c ) ) { |
| soffset = match_capture( soffset, c ); |
| if ( soffset == -1 ) |
| return -1; |
| return match( soffset, poffset + 2 ); |
| } |
| } |
| } |
| case '$': |
| if ( poffset + 1 == p.length() ) |
| return ( soffset == s.length() ) ? soffset : -1; |
| } |
| int ep = classend( poffset ); |
| boolean m = soffset < s.length() && singlematch( s.luaByte( soffset ), poffset, ep ); |
| int pc = ( ep < p.length() ) ? p.luaByte( ep ) : '\0'; |
| |
| switch ( pc ) { |
| case '?': |
| int res; |
| if ( m && ( ( res = match( soffset + 1, ep + 1 ) ) != -1 ) ) |
| return res; |
| poffset = ep + 1; |
| continue; |
| case '*': |
| return max_expand( soffset, poffset, ep ); |
| case '+': |
| return ( m ? max_expand( soffset + 1, poffset, ep ) : -1 ); |
| case '-': |
| return min_expand( soffset, poffset, ep ); |
| default: |
| if ( !m ) |
| return -1; |
| soffset++; |
| poffset = ep; |
| continue; |
| } |
| } |
| } |
| |
| int max_expand( int soff, int poff, int ep ) { |
| int i = 0; |
| while ( soff + i < s.length() && |
| singlematch( s.luaByte( soff + i ), poff, ep ) ) |
| i++; |
| while ( i >= 0 ) { |
| int res = match( soff + i, ep + 1 ); |
| if ( res != -1 ) |
| return res; |
| i--; |
| } |
| return -1; |
| } |
| |
| int min_expand( int soff, int poff, int ep ) { |
| for ( ;; ) { |
| int res = match( soff, ep + 1 ); |
| if ( res != -1 ) |
| return res; |
| else if ( soff < s.length() && singlematch( s.luaByte( soff ), poff, ep ) ) |
| soff++; |
| else return -1; |
| } |
| } |
| |
| int start_capture( int soff, int poff, int what ) { |
| int res; |
| int level = this.level; |
| if ( level >= MAX_CAPTURES ) { |
| error( "too many captures" ); |
| } |
| cinit[ level ] = soff; |
| clen[ level ] = what; |
| this.level = level + 1; |
| if ( ( res = match( soff, poff ) ) == -1 ) |
| this.level--; |
| return res; |
| } |
| |
| int end_capture( int soff, int poff ) { |
| int l = capture_to_close(); |
| int res; |
| clen[l] = soff - cinit[l]; |
| if ( ( res = match( soff, poff ) ) == -1 ) |
| clen[l] = CAP_UNFINISHED; |
| return res; |
| } |
| |
| int match_capture( int soff, int l ) { |
| l = check_capture( l ); |
| int len = clen[ l ]; |
| if ( ( s.length() - soff ) >= len && |
| LuaString.equals( s, cinit[l], s, soff, len ) ) |
| return soff + len; |
| else |
| return -1; |
| } |
| |
| int matchbalance( int soff, int poff ) { |
| final int plen = p.length(); |
| if ( poff == plen || poff + 1 == plen ) { |
| error( "unbalanced pattern" ); |
| } |
| final int slen = s.length(); |
| if ( soff >= slen ) |
| return -1; |
| final int b = p.luaByte( poff ); |
| if ( s.luaByte( soff ) != b ) |
| return -1; |
| final int e = p.luaByte( poff + 1 ); |
| int cont = 1; |
| while ( ++soff < slen ) { |
| if ( s.luaByte( soff ) == e ) { |
| if ( --cont == 0 ) return soff + 1; |
| } |
| else if ( s.luaByte( soff ) == b ) cont++; |
| } |
| return -1; |
| } |
| } |
| } |