Skip to content

Commit 18547a4

Browse files
committed
Implement an alternative Charset caching strategy that does not load all Charsets at Tomcat start. This reduces start time by ~30ms and does not, based on the performance tests included in this commit, have a negative impact on runtime look-ups. It does require that the names of all supported Charsets are known to Tomcat at compile time. The code has been tested with a range of JVMs and a unit test is provided for testing new JVMs.
1 parent c24521d commit 18547a4

File tree

5 files changed

+348
-16
lines changed

5 files changed

+348
-16
lines changed

java/org/apache/tomcat/util/buf/B2CConverter.java

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525
import java.nio.charset.CoderResult;
2626
import java.nio.charset.CodingErrorAction;
2727
import java.nio.charset.StandardCharsets;
28-
import java.util.HashMap;
2928
import java.util.Locale;
30-
import java.util.Map;
3129

3230
import org.apache.tomcat.util.res.StringManager;
3331

@@ -38,23 +36,14 @@ public class B2CConverter {
3836

3937
private static final StringManager sm = StringManager.getManager(B2CConverter.class);
4038

41-
private static final Map<String, Charset> encodingToCharsetCache =
42-
new HashMap<>();
43-
44-
// Protected so unit tests can use it
45-
protected static final int LEFTOVER_SIZE = 9;
39+
private static final CharsetCache charsetCache;
4640

4741
static {
48-
for (Charset charset: Charset.availableCharsets().values()) {
49-
encodingToCharsetCache.put(
50-
charset.name().toLowerCase(Locale.ENGLISH), charset);
51-
for (String alias : charset.aliases()) {
52-
encodingToCharsetCache.put(
53-
alias.toLowerCase(Locale.ENGLISH), charset);
54-
}
55-
}
42+
charsetCache = new CharsetCache();
5643
}
5744

45+
// Protected so unit tests can use it
46+
protected static final int LEFTOVER_SIZE = 9;
5847

5948
/**
6049
* Obtain the Charset for the given encoding
@@ -71,7 +60,7 @@ public static Charset getCharset(String enc) throws UnsupportedEncodingException
7160
// Encoding names should all be ASCII
7261
String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH);
7362

74-
Charset charset = encodingToCharsetCache.get(lowerCaseEnc);
63+
Charset charset = charsetCache.getCharset(lowerCaseEnc);
7564

7665
if (charset == null) {
7766
// Pre-population of the cache means this must be invalid
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.tomcat.util.buf;
18+
19+
import java.nio.charset.Charset;
20+
import java.nio.charset.CharsetDecoder;
21+
import java.nio.charset.CharsetEncoder;
22+
import java.util.Locale;
23+
import java.util.concurrent.ConcurrentHashMap;
24+
import java.util.concurrent.ConcurrentMap;
25+
26+
public class CharsetCache {
27+
28+
private static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" };
29+
30+
/*
31+
* Tested with:
32+
* - Oracle JDK 8 u192
33+
* - OpenJDK 13 EA 4
34+
*/
35+
private static final String[] LAZY_CHARSETS = new String[] {
36+
"big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", "gb2312", "gbk", "ibm-thai", "ibm00858",
37+
"ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145", "ibm01146", "ibm01147", "ibm01148",
38+
"ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", "ibm278", "ibm280", "ibm284", "ibm285",
39+
"ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", "ibm775", "ibm850", "ibm852", "ibm855",
40+
"ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", "ibm865", "ibm866", "ibm868", "ibm869",
41+
"ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", "iso-2022-jp-2", "iso-2022-kr", "iso-8859-13",
42+
"iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7",
43+
"iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", "jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis",
44+
"tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", "utf-32be", "utf-32le", "x-utf-32be-bom",
45+
"x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254",
46+
"windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "x-big5-hkscs-2001",
47+
"x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097",
48+
"x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", "x-ibm1129", "x-ibm1166", "x-ibm1364",
49+
"x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856",
50+
"x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", "x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939",
51+
"x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950",
52+
"x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", "x-iso-2022-cn-gb", "x-jis0208",
53+
"x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-macarabic",
54+
"x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew",
55+
"x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine",
56+
"x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", "x-eucjp-open", "x-iso-8859-11", "x-mswin-936",
57+
"x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950",
58+
"x-windows-iso2022jp"
59+
};
60+
61+
private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null);
62+
63+
private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
64+
65+
public CharsetCache() {
66+
// Pre-populate the cache
67+
for (String charsetName : INITIAL_CHARSETS) {
68+
Charset charset = Charset.forName(charsetName);
69+
addToCache(charsetName, charset);
70+
}
71+
72+
for (String charsetName : LAZY_CHARSETS) {
73+
addToCache(charsetName, DUMMY_CHARSET);
74+
}
75+
}
76+
77+
78+
private void addToCache(String name, Charset charset) {
79+
cache.put(name, charset);
80+
for (String alias : charset.aliases()) {
81+
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
82+
}
83+
}
84+
85+
86+
public Charset getCharset(String charsetName) {
87+
String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
88+
89+
Charset result = cache.get(lcCharsetName);
90+
91+
if (result == DUMMY_CHARSET) {
92+
// Name is known but the Charset is not in the cache
93+
Charset charset = Charset.forName(lcCharsetName);
94+
if (charset == null) {
95+
// Charset not available in this JVM - remove cache entry
96+
cache.remove(lcCharsetName);
97+
result = null;
98+
} else {
99+
// Charset is available - populate cache entry
100+
addToCache(lcCharsetName, charset);
101+
result = charset;
102+
}
103+
}
104+
105+
return result;
106+
}
107+
108+
109+
/*
110+
* Placeholder Charset implementation for entries that will be loaded lazily
111+
* into the cache.
112+
*/
113+
private static class DummyCharset extends Charset {
114+
115+
protected DummyCharset(String canonicalName, String[] aliases) {
116+
super(canonicalName, aliases);
117+
}
118+
119+
@Override
120+
public boolean contains(Charset cs) {
121+
return false;
122+
}
123+
124+
@Override
125+
public CharsetDecoder newDecoder() {
126+
return null;
127+
}
128+
129+
@Override
130+
public CharsetEncoder newEncoder() {
131+
return null;
132+
}
133+
}
134+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.tomcat.util.buf;
18+
19+
import java.nio.charset.Charset;
20+
import java.util.ArrayList;
21+
import java.util.Collections;
22+
import java.util.List;
23+
import java.util.Locale;
24+
25+
import org.junit.Assert;
26+
import org.junit.Test;
27+
28+
public class TestCharsetCache {
29+
30+
@Test
31+
public void testAllKnownCharsets() {
32+
CharsetCache cache = new CharsetCache();
33+
34+
List<String> cacheMisses = new ArrayList<>();
35+
36+
for (Charset charset: Charset.availableCharsets().values()) {
37+
if (cache.getCharset(charset.name()) == null) {
38+
cacheMisses.add(charset.name());
39+
} else {
40+
for (String alias : charset.aliases()) {
41+
if (cache.getCharset(alias) == null) {
42+
cacheMisses.add(alias);
43+
}
44+
}
45+
}
46+
}
47+
48+
if (cacheMisses.size() != 0) {
49+
StringBuilder sb = new StringBuilder();
50+
Collections.sort(cacheMisses);
51+
for (String name : cacheMisses) {
52+
if (sb.length() == 0) {
53+
sb.append('"');
54+
} else {
55+
sb.append(", \"");
56+
}
57+
sb.append(name.toLowerCase(Locale.ENGLISH));
58+
sb.append('"');
59+
}
60+
System.out.println(sb.toString());
61+
}
62+
63+
Assert.assertTrue(cacheMisses.size() == 0);
64+
}
65+
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.tomcat.util.buf;
18+
19+
import java.nio.charset.Charset;
20+
import java.util.HashMap;
21+
import java.util.Locale;
22+
import java.util.Map;
23+
24+
import org.junit.Test;
25+
26+
public class TestCharsetCachePerformance {
27+
28+
@Test
29+
public void testNoCsCache() throws Exception {
30+
doTest(new NoCsCache());
31+
}
32+
33+
34+
@Test
35+
public void testFullCsCache() throws Exception {
36+
doTest(new FullCsCache());
37+
}
38+
39+
40+
@Test
41+
public void testLazyCsCache() throws Exception {
42+
doTest(new LazyCsCache());
43+
}
44+
45+
46+
private void doTest(CsCache cache) throws Exception {
47+
int threadCount = 10;
48+
int iterations = 10000000;
49+
String[] lookupNames = new String[] {
50+
"ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5" };
51+
52+
Thread[] threads = new Thread[threadCount];
53+
54+
for (int i = 0; i < threadCount; i++) {
55+
threads[i] = new TestCsCacheThread(iterations, cache, lookupNames);
56+
}
57+
58+
long startTime = System.nanoTime();
59+
60+
for (int i = 0; i < threadCount; i++) {
61+
threads[i].start();
62+
}
63+
64+
for (int i = 0; i < threadCount; i++) {
65+
threads[i].join();
66+
}
67+
68+
long endTime = System.nanoTime();
69+
70+
System.out.println(cache.getClass().getName() + ": " + (endTime - startTime) + "ns");
71+
}
72+
73+
74+
private static interface CsCache {
75+
Charset getCharset(String charsetName);
76+
}
77+
78+
79+
private static class NoCsCache implements CsCache {
80+
81+
@Override
82+
public Charset getCharset(String charsetName) {
83+
return Charset.forName(charsetName);
84+
}
85+
}
86+
87+
88+
private static class FullCsCache implements CsCache {
89+
90+
private static final Map<String,Charset> cache = new HashMap<>();
91+
92+
static {
93+
for (Charset charset: Charset.availableCharsets().values()) {
94+
cache.put(charset.name().toLowerCase(Locale.ENGLISH), charset);
95+
for (String alias : charset.aliases()) {
96+
cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
97+
}
98+
}
99+
}
100+
101+
102+
@Override
103+
public Charset getCharset(String charsetName) {
104+
return cache.get(charsetName.toLowerCase(Locale.ENGLISH));
105+
}
106+
}
107+
108+
109+
private static class LazyCsCache implements CsCache {
110+
111+
private CharsetCache cache = new CharsetCache();
112+
113+
@Override
114+
public Charset getCharset(String charsetName) {
115+
return cache.getCharset(charsetName);
116+
}
117+
}
118+
119+
120+
private static class TestCsCacheThread extends Thread {
121+
122+
private final int iterations;
123+
private final CsCache cache;
124+
private final String[] lookupNames;
125+
private final int lookupNamesCount;
126+
127+
public TestCsCacheThread(int iterations, CsCache cache, String[] lookupNames) {
128+
this.iterations = iterations;
129+
this.cache = cache;
130+
this.lookupNames = lookupNames;
131+
this.lookupNamesCount = lookupNames.length;
132+
}
133+
134+
@Override
135+
public void run() {
136+
for (int i = 0; i < iterations; i++) {
137+
cache.getCharset(lookupNames[i % lookupNamesCount]);
138+
}
139+
}
140+
}
141+
}

webapps/docs/changelog.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@
9292
<bug>63246</bug>: Fix a potential <code>NullPointerException</code> when
9393
calling <code>AsyncContext.dispatch()</code>. (markt)
9494
</fix>
95+
<fix>
96+
<bug>63235</bug>: Refactor Charset cache to reduce start time. (markt)
97+
</fix>
9598
</changelog>
9699
</subsection>
97100
<subsection name="Coyote">

0 commit comments

Comments
 (0)