001    /*
002     *    GeoAPI - Java interfaces for OGC/ISO standards
003     *    http://www.geoapi.org
004     *
005     *    Copyright (C) 2007-2012 Open Geospatial Consortium, Inc.
006     *    All Rights Reserved. http://www.opengeospatial.org/ogc/legal
007     *
008     *    Permission to use, copy, and modify this software and its documentation, with
009     *    or without modification, for any purpose and without fee or royalty is hereby
010     *    granted, provided that you include the following on ALL copies of the software
011     *    and documentation or portions thereof, including modifications, that you make:
012     *
013     *    1. The full text of this NOTICE in a location viewable to users of the
014     *       redistributed or derivative work.
015     *    2. Notice of any changes or modifications to the OGC files, including the
016     *       date changes were made.
017     *
018     *    THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE
019     *    NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
020     *    TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
021     *    THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
022     *    PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
023     *
024     *    COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR
025     *    CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION.
026     *
027     *    The name and trademarks of copyright holders may NOT be used in advertising or
028     *    publicity pertaining to the software without specific, written prior permission.
029     *    Title to copyright in this software and any associated documentation will at all
030     *    times remain with copyright holders.
031     */
032    package org.opengis.metadata.identification;
033    
034    import java.nio.charset.Charset;
035    import java.nio.charset.UnsupportedCharsetException;
036    import java.util.List;
037    import java.util.ArrayList;
038    
039    import org.opengis.util.CodeList;
040    import org.opengis.annotation.UML;
041    
042    import static org.opengis.annotation.Specification.ISO_19115;
043    import static org.opengis.annotation.Obligation.CONDITIONAL;
044    
045    
046    /**
047     * Name of the character coding standard used for the resource.
048     *
049     * @author  Ely Conn (Leica Geosystems Geospatial Imaging, LLC)
050     * @version 3.0
051     * @since   2.1
052     */
053    @UML(identifier="MD_CharacterSetCode", specification=ISO_19115)
054    public final class CharacterSet extends CodeList<CharacterSet> {
055        /**
056         * Serial number for compatibility with different versions.
057         */
058        private static final long serialVersionUID = -4726629268456735927L;
059    
060        /**
061         * List of all enumerations of this type.
062         * Must be declared before any enum declaration.
063         */
064        private static final List<CharacterSet> VALUES = new ArrayList<CharacterSet>(29);
065    
066        /**
067         * 16-bit fixed size Universal Character Set, based on ISO/IEC 10646.
068         */
069        @UML(identifier="ucs2", obligation=CONDITIONAL, specification=ISO_19115)
070        public static final CharacterSet UCS_2 = new CharacterSet("UCS_2", "UCS-2");
071    
072        /**
073         * 32-bit fixed size Universal Character Set, based on ISO/IEC 10646.
074         */
075        @UML(identifier="ucs4", obligation=CONDITIONAL, specification=ISO_19115)
076        public static final CharacterSet UCS_4 = new CharacterSet("UCS_4", "UCS-4");
077    
078        /**
079         * 7-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
080         */
081        @UML(identifier="utf7", obligation=CONDITIONAL, specification=ISO_19115)
082        public static final CharacterSet UTF_7 = new CharacterSet("UTF_7", "UTF-7");
083    
084        /**
085         * 8-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
086         */
087        @UML(identifier="utf8", obligation=CONDITIONAL, specification=ISO_19115)
088        public static final CharacterSet UTF_8 = new CharacterSet("UTF_8", "UTF-8");
089    
090        /**
091         * 16-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
092         */
093        @UML(identifier="utf16", obligation=CONDITIONAL, specification=ISO_19115)
094        public static final CharacterSet UTF_16 = new CharacterSet("UTF_16", "UTF-16");
095    
096        /**
097         * ISO/IEC 8859-1, Information technology.
098         * 8-bit single-byte coded graphic character sets - Part 1: Latin alphabet No. 1.
099         */
100        @UML(identifier="8859part1", obligation=CONDITIONAL, specification=ISO_19115)
101        public static final CharacterSet ISO_8859_1 = new CharacterSet("ISO_8859_1", "ISO-8859-1");
102    
103        /**
104         * ISO/IEC 8859-2, Information technology.
105         * 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2.
106         */
107        @UML(identifier="8859part2", obligation=CONDITIONAL, specification=ISO_19115)
108        public static final CharacterSet ISO_8859_2 = new CharacterSet("ISO_8859_2", "ISO-8859-2");
109    
110        /**
111         * ISO/IEC 8859-3, Information technology.
112         * 8-bit single-byte coded graphic character sets - Part 3: Latin alphabet No. 3.
113         */
114        @UML(identifier="8859part3", obligation=CONDITIONAL, specification=ISO_19115)
115        public static final CharacterSet ISO_8859_3 = new CharacterSet("ISO_8859_3", "ISO-8859-3");
116    
117        /**
118         * ISO/IEC 8859-4, Information technology.
119         * 8-bit single-byte coded graphic character sets - Part 4: Latin alphabet No. 4.
120         */
121        @UML(identifier="8859part4", obligation=CONDITIONAL, specification=ISO_19115)
122        public static final CharacterSet ISO_8859_4 = new CharacterSet("ISO_8859_4", "ISO-8859-4");
123    
124        /**
125         * ISO/IEC 8859-5, Information technology.
126         * 8-bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet.
127         */
128        @UML(identifier="8859part5", obligation=CONDITIONAL, specification=ISO_19115)
129        public static final CharacterSet ISO_8859_5 = new CharacterSet("ISO_8859_5", "ISO-8859-5");
130    
131        /**
132         * ISO/IEC 8859-6, Information technology.
133         * 8-bit single-byte coded graphic character sets - Part 6: Latin/Arabic alphabet.
134         */
135        @UML(identifier="8859part6", obligation=CONDITIONAL, specification=ISO_19115)
136        public static final CharacterSet ISO_8859_6 = new CharacterSet("ISO_8859_6", "ISO-8859-6");
137    
138        /**
139         * ISO/IEC 8859-7, Information technology.
140         * 8-bit single-byte coded graphic character sets - Part 7: Latin/Greek alphabet.
141         */
142        @UML(identifier="8859part7", obligation=CONDITIONAL, specification=ISO_19115)
143        public static final CharacterSet ISO_8859_7 = new CharacterSet("ISO_8859_7", "ISO-8859-7");
144    
145        /**
146         * ISO/IEC 8859-8, Information technology.
147         * 8-bit single-byte coded graphic character sets - Part 8: Latin/Hebrew alphabet.
148         */
149        @UML(identifier="8859part8", obligation=CONDITIONAL, specification=ISO_19115)
150        public static final CharacterSet ISO_8859_8 = new CharacterSet("ISO_8859_8", "ISO-8859-8");
151    
152        /**
153         * ISO/IEC 8859-9, Information technology.
154         * 8-bit single-byte coded graphic character sets - Part 9: Latin alphabet No. 5.
155         */
156        @UML(identifier="8859part9", obligation=CONDITIONAL, specification=ISO_19115)
157        public static final CharacterSet ISO_8859_9 = new CharacterSet("ISO_8859_9", "ISO-8859-9");
158    
159        /**
160         * ISO/IEC 8859-10, Information technology.
161         * 8-bit single-byte coded graphic character sets - Part 10: Latin alphabet No. 6.
162         */
163        @UML(identifier="8859part10", obligation=CONDITIONAL, specification=ISO_19115)
164        public static final CharacterSet ISO_8859_10 = new CharacterSet("ISO_8859_10", "ISO-8859-10");
165    
166        /**
167         * ISO/IEC 8859-11, Information technology.
168         * 8-bit single-byte coded graphic character sets - Part 11: Latin/Thai alphabet.
169         */
170        @UML(identifier="8859part11", obligation=CONDITIONAL, specification=ISO_19115)
171        public static final CharacterSet ISO_8859_11 = new CharacterSet("ISO_8859_11", "ISO-8859-11");
172    
173        /**
174         * A future ISO/IEC 8-bit single-byte coded graphic character set.
175         */
176        @UML(identifier="8859part12", obligation=CONDITIONAL, specification=ISO_19115)
177        public static final CharacterSet ISO_8859_12 = new CharacterSet("ISO_8859_12", "ISO-8859-12");
178    
179        /**
180         * ISO/IEC 8859-13, Information technology.
181         * 8-bit single-byte coded graphic character sets - Part 13: Latin alphabet No. 7.
182         */
183        @UML(identifier="8859part13", obligation=CONDITIONAL, specification=ISO_19115)
184        public static final CharacterSet ISO_8859_13 = new CharacterSet("ISO_8859_13", "ISO-8859-13");
185    
186        /**
187         * ISO/IEC 8859-14, Information technology.
188         * 8-bit single-byte coded graphic character sets - Part 14: Latin alphabet No. 8 (Celtic).
189         */
190        @UML(identifier="8859part14", obligation=CONDITIONAL, specification=ISO_19115)
191        public static final CharacterSet ISO_8859_14 = new CharacterSet("ISO_8859_14", "ISO-8859-14");
192    
193        /**
194         * ISO/IEC 8859-15, Information technology.
195         * 8-bit single-byte coded graphic character sets - Part 15: Latin alphabet No. 9.
196         */
197        @UML(identifier="8859part15", obligation=CONDITIONAL, specification=ISO_19115)
198        public static final CharacterSet ISO_8859_15 = new CharacterSet("ISO_8859_15", "ISO-8859-15");
199    
200        /**
201         * ISO/IEC 8859-16, Information technology.
202         * 8-bit single-byte coded graphic character sets - Part 16: Latin alphabet No. 10.
203         */
204        @UML(identifier="8859part16", obligation=CONDITIONAL, specification=ISO_19115)
205        public static final CharacterSet ISO_8859_16 = new CharacterSet("ISO_8859_16", "ISO-8859-16");
206    
207        /**
208         * Japanese code set used for electronic transmission.
209         */
210        @UML(identifier="jis", obligation=CONDITIONAL, specification=ISO_19115)
211        public static final CharacterSet JIS = new CharacterSet("JIS", "JIS_X0201");
212    
213        /**
214         * Japanese code set used on MS-DOS based machines.
215         */
216        @UML(identifier="shiftJIS", obligation=CONDITIONAL, specification=ISO_19115)
217        public static final CharacterSet SHIFT_JIS = new CharacterSet("SHIFT_JIS", "Shift_JIS");
218    
219        /**
220         * Japanese code set used on UNIX based machines.
221         */
222        @UML(identifier="eucJP", obligation=CONDITIONAL, specification=ISO_19115)
223        public static final CharacterSet EUC_JP = new CharacterSet("EUC_JP", "EUC-JP");
224    
225        /**
226         * United States ASCII code set (ISO 646 US).
227         */
228        @UML(identifier="usAscii", obligation=CONDITIONAL, specification=ISO_19115)
229        public static final CharacterSet US_ASCII = new CharacterSet("US_ASCII", "US-ASCII");
230    
231        /**
232         * IBM mainframe code set.
233         */
234        @UML(identifier="ebcdic", obligation=CONDITIONAL, specification=ISO_19115)
235        public static final CharacterSet EBCDIC = new CharacterSet("EBCDIC", null);
236    
237        /**
238         * Korean code set.
239         */
240        @UML(identifier="eucKR", obligation=CONDITIONAL, specification=ISO_19115)
241        public static final CharacterSet EUC_KR = new CharacterSet("EUC_KR", "EUC-KR");
242    
243        /**
244         * Traditional Chinese code set used in Taiwan, Hong Kong, and other areas.
245         */
246        @UML(identifier="big5", obligation=CONDITIONAL, specification=ISO_19115)
247        public static final CharacterSet BIG_5 = new CharacterSet("BIG_5", "Big5");
248    
249        /**
250         * Simplified Chinese code set.
251         */
252        @UML(identifier="GB2312", obligation=CONDITIONAL, specification=ISO_19115)
253        public static final CharacterSet GB2312 = new CharacterSet("GB2312", "GB2312");
254    
255        /**
256         * The Java {@link Charset} name (never {@code null}).
257         */
258        private final String charset;
259    
260        /**
261         * Constructs an enum with the given name. The new enum is
262         * automatically added to the list returned by {@link #values}.
263         *
264         * @param name The enum name. This name must not be in use by an other enum of this type.
265         * @param charset The Java {@link Charset} name, or {@code null} if none.
266         */
267        private CharacterSet(final String name, final String charset) {
268            super(name, VALUES);
269            this.charset = (charset != null) ? charset : name;
270        }
271    
272        /**
273         * Constructs an enum with identical name and charset.
274         * This is needed for {@link CodeList#valueOf(Class, String)} reflection.
275         */
276        private CharacterSet(final String name) {
277            this(name, name);
278        }
279    
280        /**
281         * Converts the Character Set to a java Charset, if it can.
282         *
283         * @return The Java Charset.
284         * @throws UnsupportedCharsetException If no support for the charset is available.
285         *
286         * @see <A HREF="http://download.oracle.com/javase/6/docs/technotes/guides/intl/encoding.doc.html">Supported encodings</A>
287         */
288        public Charset toCharset() throws UnsupportedCharsetException {
289            return Charset.forName(charset);
290        }
291    
292        /**
293         * Returns all the names of this code. The returned array contains the
294         * following elements, with duplicated values and null values removed:
295         * <p>
296         * <ul>
297         *   <li>The programmatic {@linkplain #name() name}</li>
298         *   <li>The UML {@linkplain #identifier() identifier}</li>
299         *   <li>The {@linkplain #toCharset() charset} name</li>
300         * </ul>
301         * <p>
302         * Those names are typically equal except for the case (programmatic names are upper case
303         * while UML names are lower case) and special characters like {@code '-'}.
304         *
305         * @return All names of this code constant. This array is never null and never empty.
306         *
307         * @since 2.3
308         */
309        @Override
310        public String[] names() {
311            final String name = name();
312            final String charset = this.charset;
313            if (charset.equals(name)) {
314                return super.names();
315            }
316            final String identifier = identifier();
317            if (identifier != null && !identifier.equals(name)) {
318                return new String[] {name, identifier, charset};
319            } else {
320                return new String[] {name, charset};
321            }
322        }
323    
324        /**
325         * Returns the list of {@code CharacterSet}s.
326         *
327         * @return The list of codes declared in the current JVM.
328         */
329        public static CharacterSet[] values() {
330            synchronized (VALUES) {
331                return VALUES.toArray(new CharacterSet[VALUES.size()]);
332            }
333        }
334    
335        /**
336         * Returns the list of enumerations of the same kind than this enum.
337         */
338        @Override
339        public CharacterSet[] family() {
340            return values();
341        }
342    
343        /**
344         * Returns the character set that matches the given string, or returns a
345         * new one if none match it. More specifically, this methods returns the first instance for
346         * which <code>{@linkplain #name() name()}.{@linkplain String#equals equals}(code)</code>
347         * returns {@code true}. If no existing instance is found, then a new one is created for
348         * the given name.
349         *
350         * @param code The name of the code to fetch or to create.
351         * @return A code matching the given name.
352         */
353        public static CharacterSet valueOf(String code) {
354            return valueOf(CharacterSet.class, code);
355        }
356    }