Libav 0.7.1
libavformat/id3v2.c
Go to the documentation of this file.
00001 /*
00002  * ID3v2 header parser
00003  * Copyright (c) 2003 Fabrice Bellard
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00022 #include "id3v2.h"
00023 #include "id3v1.h"
00024 #include "libavutil/avstring.h"
00025 #include "libavutil/intreadwrite.h"
00026 #include "libavutil/dict.h"
00027 #include "avio_internal.h"
00028 
00029 int ff_id3v2_match(const uint8_t *buf, const char * magic)
00030 {
00031     return  buf[0]         == magic[0] &&
00032             buf[1]         == magic[1] &&
00033             buf[2]         == magic[2] &&
00034             buf[3]         != 0xff &&
00035             buf[4]         != 0xff &&
00036            (buf[6] & 0x80) ==    0 &&
00037            (buf[7] & 0x80) ==    0 &&
00038            (buf[8] & 0x80) ==    0 &&
00039            (buf[9] & 0x80) ==    0;
00040 }
00041 
00042 int ff_id3v2_tag_len(const uint8_t * buf)
00043 {
00044     int len = ((buf[6] & 0x7f) << 21) +
00045               ((buf[7] & 0x7f) << 14) +
00046               ((buf[8] & 0x7f) << 7) +
00047                (buf[9] & 0x7f) +
00048               ID3v2_HEADER_SIZE;
00049     if (buf[5] & 0x10)
00050         len += ID3v2_HEADER_SIZE;
00051     return len;
00052 }
00053 
00054 static unsigned int get_size(AVIOContext *s, int len)
00055 {
00056     int v = 0;
00057     while (len--)
00058         v = (v << 7) + (avio_r8(s) & 0x7F);
00059     return v;
00060 }
00061 
00062 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key)
00063 {
00064     char *q, dst[512];
00065     const char *val = NULL;
00066     int len, dstlen = sizeof(dst) - 1;
00067     unsigned genre;
00068     unsigned int (*get)(AVIOContext*) = avio_rb16;
00069 
00070     dst[0] = 0;
00071     if (taglen < 1)
00072         return;
00073 
00074     taglen--; /* account for encoding type byte */
00075 
00076     switch (avio_r8(pb)) { /* encoding type */
00077 
00078     case ID3v2_ENCODING_ISO8859:
00079         q = dst;
00080         while (taglen-- && q - dst < dstlen - 7) {
00081             uint8_t tmp;
00082             PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;)
00083         }
00084         *q = 0;
00085         break;
00086 
00087     case ID3v2_ENCODING_UTF16BOM:
00088         taglen -= 2;
00089         switch (avio_rb16(pb)) {
00090         case 0xfffe:
00091             get = avio_rl16;
00092         case 0xfeff:
00093             break;
00094         default:
00095             av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
00096             return;
00097         }
00098         // fall-through
00099 
00100     case ID3v2_ENCODING_UTF16BE:
00101         q = dst;
00102         while (taglen > 1 && q - dst < dstlen - 7) {
00103             uint32_t ch;
00104             uint8_t tmp;
00105 
00106             GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;)
00107             PUT_UTF8(ch, tmp, *q++ = tmp;)
00108         }
00109         *q = 0;
00110         break;
00111 
00112     case ID3v2_ENCODING_UTF8:
00113         len = FFMIN(taglen, dstlen);
00114         avio_read(pb, dst, len);
00115         dst[len] = 0;
00116         break;
00117     default:
00118         av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key);
00119     }
00120 
00121     if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
00122         && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1)
00123         && genre <= ID3v1_GENRE_MAX)
00124         val = ff_id3v1_genre_str[genre];
00125     else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
00126         /* dst now contains two 0-terminated strings */
00127         dst[dstlen] = 0;
00128         len = strlen(dst);
00129         key = dst;
00130         val = dst + FFMIN(len + 1, dstlen);
00131     }
00132     else if (*dst)
00133         val = dst;
00134 
00135     if (val)
00136         av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE);
00137 }
00138 
00139 static int is_number(const char *str)
00140 {
00141     while (*str >= '0' && *str <= '9') str++;
00142     return !*str;
00143 }
00144 
00145 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag)
00146 {
00147     AVDictionaryEntry *t;
00148     if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
00149         strlen(t->value) == 4 && is_number(t->value))
00150         return t;
00151     return NULL;
00152 }
00153 
00154 static void merge_date(AVDictionary **m)
00155 {
00156     AVDictionaryEntry *t;
00157     char date[17] = {0};      // YYYY-MM-DD hh:mm
00158 
00159     if (!(t = get_date_tag(*m, "TYER")) &&
00160         !(t = get_date_tag(*m, "TYE")))
00161         return;
00162     av_strlcpy(date, t->value, 5);
00163     av_dict_set(m, "TYER", NULL, 0);
00164     av_dict_set(m, "TYE",  NULL, 0);
00165 
00166     if (!(t = get_date_tag(*m, "TDAT")) &&
00167         !(t = get_date_tag(*m, "TDA")))
00168         goto finish;
00169     snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
00170     av_dict_set(m, "TDAT", NULL, 0);
00171     av_dict_set(m, "TDA",  NULL, 0);
00172 
00173     if (!(t = get_date_tag(*m, "TIME")) &&
00174         !(t = get_date_tag(*m, "TIM")))
00175         goto finish;
00176     snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2);
00177     av_dict_set(m, "TIME", NULL, 0);
00178     av_dict_set(m, "TIM",  NULL, 0);
00179 
00180 finish:
00181     if (date[0])
00182         av_dict_set(m, "date", date, 0);
00183 }
00184 
00185 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
00186 {
00187     int isv34, tlen, unsync;
00188     char tag[5];
00189     int64_t next, end = avio_tell(s->pb) + len;
00190     int taghdrlen;
00191     const char *reason = NULL;
00192     AVIOContext pb;
00193     unsigned char *buffer = NULL;
00194     int buffer_size = 0;
00195 
00196     switch (version) {
00197     case 2:
00198         if (flags & 0x40) {
00199             reason = "compression";
00200             goto error;
00201         }
00202         isv34 = 0;
00203         taghdrlen = 6;
00204         break;
00205 
00206     case 3:
00207     case 4:
00208         isv34 = 1;
00209         taghdrlen = 10;
00210         break;
00211 
00212     default:
00213         reason = "version";
00214         goto error;
00215     }
00216 
00217     unsync = flags & 0x80;
00218 
00219     if (isv34 && flags & 0x40) /* Extended header present, just skip over it */
00220         avio_skip(s->pb, get_size(s->pb, 4));
00221 
00222     while (len >= taghdrlen) {
00223         unsigned int tflags = 0;
00224         int tunsync = 0;
00225 
00226         if (isv34) {
00227             avio_read(s->pb, tag, 4);
00228             tag[4] = 0;
00229             if(version==3){
00230                 tlen = avio_rb32(s->pb);
00231             }else
00232                 tlen = get_size(s->pb, 4);
00233             tflags = avio_rb16(s->pb);
00234             tunsync = tflags & ID3v2_FLAG_UNSYNCH;
00235         } else {
00236             avio_read(s->pb, tag, 3);
00237             tag[3] = 0;
00238             tlen = avio_rb24(s->pb);
00239         }
00240         if (tlen <= 0 || tlen > len - taghdrlen) {
00241             av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag);
00242             break;
00243         }
00244         len -= taghdrlen + tlen;
00245         next = avio_tell(s->pb) + tlen;
00246 
00247         if (tflags & ID3v2_FLAG_DATALEN) {
00248             avio_rb32(s->pb);
00249             tlen -= 4;
00250         }
00251 
00252         if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) {
00253             av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
00254             avio_skip(s->pb, tlen);
00255         } else if (tag[0] == 'T') {
00256             if (unsync || tunsync) {
00257                 int i, j;
00258                 av_fast_malloc(&buffer, &buffer_size, tlen);
00259                 if (!buffer) {
00260                     av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
00261                     goto seek;
00262                 }
00263                 for (i = 0, j = 0; i < tlen; i++, j++) {
00264                     buffer[j] = avio_r8(s->pb);
00265                     if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) {
00266                         /* Unsynchronised byte, skip it */
00267                         j--;
00268                     }
00269                 }
00270                 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL);
00271                 read_ttag(s, &pb, j, tag);
00272             } else {
00273                 read_ttag(s, s->pb, tlen, tag);
00274             }
00275         }
00276         else if (!tag[0]) {
00277             if (tag[1])
00278                 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding");
00279             avio_skip(s->pb, tlen);
00280             break;
00281         }
00282         /* Skip to end of tag */
00283 seek:
00284         avio_seek(s->pb, next, SEEK_SET);
00285     }
00286 
00287     if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */
00288         end += 10;
00289 
00290   error:
00291     if (reason)
00292         av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason);
00293     avio_seek(s->pb, end, SEEK_SET);
00294     av_free(buffer);
00295     return;
00296 }
00297 
00298 void ff_id3v2_read(AVFormatContext *s, const char *magic)
00299 {
00300     int len, ret;
00301     uint8_t buf[ID3v2_HEADER_SIZE];
00302     int     found_header;
00303     int64_t off;
00304 
00305     do {
00306         /* save the current offset in case there's nothing to read/skip */
00307         off = avio_tell(s->pb);
00308         ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE);
00309         if (ret != ID3v2_HEADER_SIZE)
00310             break;
00311             found_header = ff_id3v2_match(buf, magic);
00312             if (found_header) {
00313             /* parse ID3v2 header */
00314             len = ((buf[6] & 0x7f) << 21) |
00315                   ((buf[7] & 0x7f) << 14) |
00316                   ((buf[8] & 0x7f) << 7) |
00317                    (buf[9] & 0x7f);
00318             ff_id3v2_parse(s, len, buf[3], buf[5]);
00319         } else {
00320             avio_seek(s->pb, off, SEEK_SET);
00321         }
00322     } while (found_header);
00323     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv);
00324     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv);
00325     ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv);
00326     merge_date(&s->metadata);
00327 }
00328 
00329 const AVMetadataConv ff_id3v2_34_metadata_conv[] = {
00330     { "TALB", "album"},
00331     { "TCOM", "composer"},
00332     { "TCON", "genre"},
00333     { "TCOP", "copyright"},
00334     { "TENC", "encoded_by"},
00335     { "TIT2", "title"},
00336     { "TLAN", "language"},
00337     { "TPE1", "artist"},
00338     { "TPE2", "album_artist"},
00339     { "TPE3", "performer"},
00340     { "TPOS", "disc"},
00341     { "TPUB", "publisher"},
00342     { "TRCK", "track"},
00343     { "TSSE", "encoder"},
00344     { 0 }
00345 };
00346 
00347 const AVMetadataConv ff_id3v2_4_metadata_conv[] = {
00348     { "TDRL", "date"},
00349     { "TDRC", "date"},
00350     { "TDEN", "creation_time"},
00351     { "TSOA", "album-sort"},
00352     { "TSOP", "artist-sort"},
00353     { "TSOT", "title-sort"},
00354     { 0 }
00355 };
00356 
00357 const AVMetadataConv ff_id3v2_2_metadata_conv[] = {
00358     { "TAL",  "album"},
00359     { "TCO",  "genre"},
00360     { "TT2",  "title"},
00361     { "TEN",  "encoded_by"},
00362     { "TP1",  "artist"},
00363     { "TP2",  "album_artist"},
00364     { "TP3",  "performer"},
00365     { "TRK",  "track"},
00366     { 0 }
00367 };
00368 
00369 
00370 const char ff_id3v2_tags[][4] = {
00371    "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
00372    "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
00373    "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
00374    "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
00375    { 0 },
00376 };
00377 
00378 const char ff_id3v2_4_tags[][4] = {
00379    "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
00380    "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
00381    { 0 },
00382 };
00383 
00384 const char ff_id3v2_3_tags[][4] = {
00385    "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
00386    { 0 },
00387 };