diff options
Diffstat (limited to 'channels/console_video.c')
-rw-r--r-- | channels/console_video.c | 3280 |
1 files changed, 3280 insertions, 0 deletions
diff --git a/channels/console_video.c b/channels/console_video.c new file mode 100644 index 000000000..429319130 --- /dev/null +++ b/channels/console_video.c @@ -0,0 +1,3280 @@ +/* + * Experimental support for video sessions. We use SDL for rendering, ffmpeg + * as the codec library for encoding and decoding, and Video4Linux and X11 + * to generate the local video stream. + * + * If one of these pieces is not available, either at compile time or at + * runtime, we do our best to run without it. Of course, no codec library + * means we can only deal with raw data, no SDL means we cannot do rendering, + * no V4L or X11 means we cannot generate data (but in principle we could + * stream from or record to a file). + * + * We need a recent (2007.07.12 or newer) version of ffmpeg to avoid warnings. + * Older versions might give 'deprecated' messages during compilation, + * thus not compiling in AST_DEVMODE, or don't have swscale, in which case + * you can try to compile #defining OLD_FFMPEG here. + * + * $Revision$ + */ + +//#define DROP_PACKETS 5 /* if set, drop this % of video packets */ +//#define OLD_FFMPEG 1 /* set for old ffmpeg with no swscale */ + + +/* +The code is structured as follows. + +When a new console channel is created, we call console_video_start() +to initialize SDL, the source, and the encoder/ decoder for the +formats in use (XXX the latter two should be done later, once the +codec negotiation is complete). Also, a thread is created to handle +the video source and generate frames. + +While communication is on, the local source is generated by the +video thread, which wakes up periodically, generates frames and +enqueues them in chan->readq. Incoming rtp frames are passed to +console_write_video(), decoded and passed to SDL for display. + +For as unfortunate and confusing as it can be, we need to deal with a +number of different video representations (size, codec/pixel format, +codec parameters), as follows: + + loc_src is the data coming from the camera/X11/etc. + The format is typically constrained by the video source. + + enc_in is the input required by the encoder. + Typically constrained in size by the encoder type. + + enc_out is the bitstream transmitted over RTP. + Typically negotiated while the call is established. + + loc_dpy is the format used to display the local video source. + Depending on user preferences this can have the same size as + loc_src_fmt, or enc_in_fmt, or thumbnail size (e.g. PiP output) + + dec_in is the incoming RTP bitstream. Negotiated + during call establishment, it is not necessarily the same as + enc_in_fmt + + dec_out the output of the decoder. + The format is whatever the other side sends, and the + buffer is allocated by avcodec_decode_... so we only + copy the data here. + + rem_dpy the format used to display the remote stream + +We store the format info together with the buffer storing the data. +As a future optimization, a format/buffer may reference another one +if the formats are equivalent. This will save some unnecessary format +conversion. + + +In order to handle video you need to add to sip.conf (and presumably +iax.conf too) the following: + + [general](+) + videosupport=yes + allow=h263 ; this or other video formats + allow=h263p ; this or other video formats + + */ + +/* + * Codecs are absolutely necessary or we cannot do anything. + * In principle SDL is optional too (used for rendering only, but we + * could still source data withouth it), however at the moment it is required. + */ +#if defined(HAVE_FFMPEG) && defined(HAVE_SDL) + +#ifdef HAVE_X11 +#include <X11/Xlib.h> /* this should be conditional */ +#endif + +#include <ffmpeg/avcodec.h> +#ifndef OLD_FFMPEG +#include <ffmpeg/swscale.h> /* requires a recent ffmpeg */ +#endif + +#include <SDL/SDL.h> +#ifdef HAVE_SDL_IMAGE +#include <SDL/SDL_image.h> /* for loading images */ +#endif +#ifdef HAVE_SDL_TTF +#include <SDL/SDL_ttf.h> /* render text on sdl surfaces */ +#endif + +/* + * In many places we use buffers to store the raw frames (but not only), + * so here is a structure to keep all the info. data = NULL means the + * structure is not initialized, so the other fields are invalid. + * size = 0 means the buffer is not malloc'ed so we don't have to free it. + */ +struct fbuf_t { /* frame buffers, dynamically allocated */ + uint8_t *data; /* memory, malloced if size > 0, just reference + * otherwise */ + int size; /* total size in bytes */ + int used; /* space used so far */ + int ebit; /* bits to ignore at the end */ + int x; /* origin, if necessary */ + int y; + int w; /* size */ + int h; + int pix_fmt; +}; + +struct video_codec_desc; /* forward declaration */ +/* + * Descriptor of the local source, made of the following pieces: + * + configuration info (geometry, device name, fps...). These are read + * from the config file and copied here before calling video_out_init(); + * + the frame buffer (buf) and source pixel format, allocated at init time; + * + the encoding and RTP info, including timestamps to generate + * frames at the correct rate; + * + source-specific info, i.e. fd for /dev/video, dpy-image for x11, etc, + * filled in by video_open + * NOTE: loc_src.data == NULL means the rest of the struct is invalid, and + * the video source is not available. + */ +struct video_out_desc { + /* video device support. + * videodevice and geometry are read from the config file. + * At the right time we try to open it and allocate a buffer. + * If we are successful, webcam_bufsize > 0 and we can read. + */ + /* all the following is config file info copied from the parent */ + char videodevice[64]; + int fps; + int bitrate; + int qmin; + + int sendvideo; + + struct fbuf_t loc_src; /* local source buffer, allocated in video_open() */ + struct fbuf_t enc_in; /* encoder input buffer, allocated in video_out_init() */ + struct fbuf_t enc_out; /* encoder output buffer, allocated in video_out_init() */ + struct fbuf_t loc_dpy; /* display source buffer, no buffer (managed by SDL in bmp[1]) */ + struct fbuf_t keypad_dpy; /* keypad source buffer, XXX */ + + struct video_codec_desc *enc; /* encoder */ + AVCodecContext *enc_ctx; /* encoding context */ + AVCodec *codec; + AVFrame *frame; /* The initial part is an AVPicture */ + int mtu; + struct timeval last_frame; /* when we read the last frame ? */ + + /* device specific info */ + int fd; /* file descriptor, for webcam */ +#ifdef HAVE_X11 + Display *dpy; /* x11 grabber info */ + XImage *image; + int screen_width; /* width of X screen */ + int screen_height; /* height of X screen */ +#endif +}; + +/* + * Descriptor for the incoming stream, with a buffer for the bitstream + * extracted by the RTP packets, RTP reassembly info, and a frame buffer + * for the decoded frame (buf). + * and store the result in a suitable frame buffer for later display. + * NOTE: dec_ctx == NULL means the rest is invalid (e.g. because no + * codec, no memory, etc.) and we must drop all incoming frames. + * + * Incoming payload is stored in one of the dec_in[] buffers, which are + * emptied by the video thread. These buffers are organized in a circular + * queue, with dec_in_cur being the buffer in use by the incoming stream, + * and dec_in_dpy is the one being displayed. When the pointers need to + * be changed, we synchronize the access to them with dec_in_lock. + * When the list is full dec_in_cur = NULL (we cannot store new data), + * when the list is empty dec_in_dpy is NULL (we cannot display frames). + */ +struct video_in_desc { + struct video_codec_desc *dec; /* decoder */ + AVCodecContext *dec_ctx; /* information about the codec in the stream */ + AVCodec *codec; /* reference to the codec */ + AVFrame *d_frame; /* place to store the decoded frame */ + AVCodecParserContext *parser; + uint16_t next_seq; /* must be 16 bit */ + int discard; /* flag for discard status */ +#define N_DEC_IN 3 /* number of incoming buffers */ + struct fbuf_t *dec_in_cur; /* buffer being filled in */ + struct fbuf_t *dec_in_dpy; /* buffer to display */ + ast_mutex_t dec_in_lock; + struct fbuf_t dec_in[N_DEC_IN]; /* incoming bitstream, allocated/extended in fbuf_append() */ + struct fbuf_t dec_out; /* decoded frame, no buffer (data is in AVFrame) */ + struct fbuf_t rem_dpy; /* display remote image, no buffer (it is in win[WIN_REMOTE].bmp) */ +}; + +/* + * Each codec is defined by a number of callbacks + */ +/*! \brief initialize the encoder */ +typedef int (*encoder_init_f)(struct video_out_desc *v); + +/*! \brief actually call the encoder */ +typedef int (*encoder_encode_f)(struct video_out_desc *v); + +/*! \brief encapsulate the bistream in RTP frames */ +typedef struct ast_frame *(*encoder_encap_f)(struct video_out_desc *out, + struct ast_frame **tail); + +/*! \brief inizialize the decoder */ +typedef int (*decoder_init_f)(struct video_in_desc *v); + +/*! \brief extract the bitstream from RTP frames and store in the fbuf. + * return 0 if ok, 1 on error + */ +typedef int (*decoder_decap_f)(struct fbuf_t *b, uint8_t *data, int len); + +/*! \brief actually call the decoder */ +typedef int (*decoder_decode_f)(struct video_in_desc *v, struct fbuf_t *b); + +struct video_codec_desc { + const char *name; /* format name */ + int format; /* AST_FORMAT_* */ + encoder_init_f enc_init; + encoder_encap_f enc_encap; + encoder_encode_f enc_run; + decoder_init_f dec_init; + decoder_decap_f dec_decap; + decoder_decode_f dec_run; +}; + +/* our representation of a displayed window. SDL can only do one main + * window so we map everything within that one + */ +enum { WIN_LOCAL, WIN_REMOTE, WIN_KEYPAD, WIN_MAX }; + +struct display_window { + SDL_Overlay *bmp; + SDL_Rect rect; /* loc. of images */ +}; + +#define GUI_BUFFER_LEN 256 /* buffer lenght used for input buffers */ + +enum kp_type { KP_NONE, KP_RECT, KP_CIRCLE }; +struct keypad_entry { + int c; /* corresponding character */ + int x0, y0, x1, y1, h; /* arguments */ + enum kp_type type; +}; + +/*! \brief info related to the gui: button status, mouse coords, etc. */ +struct gui_info { + char inbuf[GUI_BUFFER_LEN]; /* buffer for to-dial buffer */ + int inbuf_pos; /* next free position in inbuf */ + char msgbuf[GUI_BUFFER_LEN]; /* buffer for text-message buffer */ + int msgbuf_pos; /* next free position in msgbuf */ + int text_mode; /* switch to-dial and text-message mode */ + int drag_mode; /* switch phone and drag-source mode */ + int x_drag; /* x coordinate where the drag starts */ + int y_drag; /* y coordinate where the drag starts */ +#ifdef HAVE_SDL_TTF + TTF_Font *font; /* font to be used */ +#endif + int outfd; /* fd for output */ + SDL_Surface *keypad; /* the pixmap for the keypad */ + int kp_size, kp_used; + struct keypad_entry *kp; +}; + +/* + * The overall descriptor, with room for config info, video source and + * received data descriptors, SDL info, etc. + */ +struct video_desc { + char codec_name[64]; /* the codec we use */ + + pthread_t vthread; /* video thread */ + int shutdown; /* set to shutdown vthread */ + struct ast_channel *owner; /* owner channel */ + + struct video_in_desc in; /* remote video descriptor */ + struct video_out_desc out; /* local video descriptor */ + + struct gui_info gui; + + /* support for display. */ + int sdl_ok; + int gui_ok; + SDL_Surface *screen; /* the main window */ + char keypad_file[256]; /* image for the keypad */ + char keypad_mask[256]; /* background for the keypad */ + char keypad_font[256]; /* font for the keypad */ + struct display_window win[WIN_MAX]; +}; + +/*! The list of video formats we support. */ +#define CONSOLE_FORMAT_VIDEO ( \ + AST_FORMAT_H263_PLUS | AST_FORMAT_H263 | \ + AST_FORMAT_MP4_VIDEO | \ + AST_FORMAT_H264 | AST_FORMAT_H261) + +static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p); + +static void fbuf_free(struct fbuf_t *b) +{ + struct fbuf_t x = *b; + + if (b->data && b->size) + ast_free(b->data); + bzero(b, sizeof(*b)); + /* restore some fields */ + b->w = x.w; + b->h = x.h; + b->pix_fmt = x.pix_fmt; +} + +/* + * Append a chunk of data to a buffer taking care of bit alignment + * Return 0 on success, != 0 on failure + */ +static int fbuf_append(struct fbuf_t *b, uint8_t *src, int len, + int sbit, int ebit) +{ + /* + * Allocate buffer. ffmpeg wants an extra FF_INPUT_BUFFER_PADDING_SIZE, + * and also wants 0 as a buffer terminator to prevent trouble. + */ + int need = len + FF_INPUT_BUFFER_PADDING_SIZE; + int i; + uint8_t *dst, mask; + + if (b->data == NULL) { + b->size = need; + b->used = 0; + b->ebit = 0; + b->data = ast_calloc(1, b->size); + } else if (b->used + need > b->size) { + b->size = b->used + need; + b->data = ast_realloc(b->data, b->size); + } + if (b->data == NULL) { + ast_log(LOG_WARNING, "alloc failure for %d, discard\n", + b->size); + return 1; + } + if (b->used == 0 && b->ebit != 0) { + ast_log(LOG_WARNING, "ebit not reset at start\n"); + b->ebit = 0; + } + dst = b->data + b->used; + i = b->ebit + sbit; /* bits to ignore around */ + if (i == 0) { /* easy case, just append */ + /* do everything in the common block */ + } else if (i == 8) { /* easy too, just handle the overlap byte */ + mask = (1 << b->ebit) - 1; + /* update the last byte in the buffer */ + dst[-1] &= ~mask; /* clear bits to ignore */ + dst[-1] |= (*src & mask); /* append new bits */ + src += 1; /* skip and prepare for common block */ + len --; + } else { /* must shift the new block, not done yet */ + ast_log(LOG_WARNING, "must handle shift %d %d at %d\n", + b->ebit, sbit, b->used); + return 1; + } + memcpy(dst, src, len); + b->used += len; + b->ebit = ebit; + b->data[b->used] = 0; /* padding */ + return 0; +} + +/*! + * Build an ast_frame for a given chunk of data, and link it into + * the queue, with possibly 'head' bytes at the beginning to + * fill in some fields later. + */ +static struct ast_frame *create_video_frame(uint8_t *start, uint8_t *end, + int format, int head, struct ast_frame *prev) +{ + int len = end-start; + uint8_t *data; + struct ast_frame *f; + + data = ast_calloc(1, len+head); + f = ast_calloc(1, sizeof(*f)); + if (f == NULL || data == NULL) { + ast_log(LOG_WARNING, "--- frame error f %p data %p len %d format %d\n", + f, data, len, format); + if (f) + ast_free(f); + if (data) + ast_free(data); + return NULL; + } + memcpy(data+head, start, len); + f->data = data; + f->mallocd = AST_MALLOCD_DATA | AST_MALLOCD_HDR; + //f->has_timing_info = 1; + //f->ts = ast_tvdiff_ms(ast_tvnow(), out->ts); + f->datalen = len+head; + f->frametype = AST_FRAME_VIDEO; + f->subclass = format; + f->samples = 0; + f->offset = 0; + f->src = "Console"; + f->delivery.tv_sec = 0; + f->delivery.tv_usec = 0; + f->seqno = 0; + AST_LIST_NEXT(f, frame_list) = NULL; + + if (prev) + AST_LIST_NEXT(prev, frame_list) = f; + + return f; +} + +/* some debugging code to check the bitstream: + * declare a bit buffer, initialize it, and fetch data from it. + */ +struct bitbuf { + const uint8_t *base; + int bitsize; /* total size in bits */ + int ofs; /* next bit to read */ +}; + +static struct bitbuf bitbuf_init(const uint8_t *base, int bitsize, int start_ofs) +{ + struct bitbuf a; + a.base = base; + a.bitsize = bitsize; + a.ofs = start_ofs; + return a; +} + +static int bitbuf_left(struct bitbuf *b) +{ + return b->bitsize - b->ofs; +} + +static uint32_t getbits(struct bitbuf *b, int n) +{ + int i, ofs; + const uint8_t *d; + uint8_t mask; + uint32_t retval = 0; + if (n> 31) { + ast_log(LOG_WARNING, "too many bits %d, max 32\n", n); + return 0; + } + if (n + b->ofs > b->bitsize) { + ast_log(LOG_WARNING, "bitbuf overflow %d of %d\n", n + b->ofs, b->bitsize); + n = b->bitsize - b->ofs; + } + ofs = 7 - b->ofs % 8; /* start from msb */ + mask = 1 << ofs; + d = b->base + b->ofs / 8; /* current byte */ + for (i=0 ; i < n; i++) { + retval += retval + (*d & mask ? 1 : 0); /* shift in new byte */ + b->ofs++; + mask >>= 1; + if (mask == 0) { + d++; + mask = 0x80; + } + } + return retval; +} + +static void check_h261(struct fbuf_t *b) +{ + struct bitbuf a = bitbuf_init(b->data, b->used * 8, 0); + uint32_t x, y; + + x = getbits(&a, 20); /* PSC, 0000 0000 0000 0001 0000 */ + if (x != 0x10) { + ast_log(LOG_WARNING, "bad PSC 0x%x\n", x); + return; + } + x = getbits(&a, 5); /* temporal reference */ + y = getbits(&a, 6); /* ptype */ + if (0) + ast_log(LOG_WARNING, "size %d TR %d PTY spl %d doc %d freeze %d %sCIF hi %d\n", + b->used, + x, + (y & 0x20) ? 1 : 0, + (y & 0x10) ? 1 : 0, + (y & 0x8) ? 1 : 0, + (y & 0x4) ? "" : "Q", + (y & 0x2) ? 1:0); + while ( (x = getbits(&a, 1)) == 1) + ast_log(LOG_WARNING, "PSPARE 0x%x\n", getbits(&a, 8)); + // ast_log(LOG_WARNING, "PSPARE 0 - start GOB LAYER\n"); + while ( (x = bitbuf_left(&a)) > 0) { + // ast_log(LOG_WARNING, "GBSC %d bits left\n", x); + x = getbits(&a, 16); /* GBSC 0000 0000 0000 0001 */ + if (x != 0x1) { + ast_log(LOG_WARNING, "bad GBSC 0x%x\n", x); + break; + } + x = getbits(&a, 4); /* group number */ + y = getbits(&a, 5); /* gquant */ + if (x == 0) { + ast_log(LOG_WARNING, " bad GN %d\n", x); + break; + } + while ( (x = getbits(&a, 1)) == 1) + ast_log(LOG_WARNING, "GSPARE 0x%x\n", getbits(&a, 8)); + while ( (x = bitbuf_left(&a)) > 0) { /* MB layer */ + break; + } + } +} + +void dump_buf(struct fbuf_t *b); +void dump_buf(struct fbuf_t *b) +{ + int i, x, last2lines; + char buf[80]; + + last2lines = (b->used - 16) & ~0xf; + ast_log(LOG_WARNING, "buf size %d of %d\n", b->used, b->size); + for (i = 0; i < b->used; i++) { + x = i & 0xf; + if ( x == 0) { /* new line */ + if (i != 0) + ast_log(LOG_WARNING, "%s\n", buf); + bzero(buf, sizeof(buf)); + sprintf(buf, "%04x: ", i); + } + sprintf(buf + 6 + x*3, "%02x ", b->data[i]); + if (i > 31 && i < last2lines) + i = last2lines - 1; + } + if (buf[0]) + ast_log(LOG_WARNING, "%s\n", buf); +} +/* + * Here starts the glue code for the various supported video codecs. + * For each of them, we need to provide routines for initialization, + * calling the encoder, encapsulating the bitstream in ast_frames, + * extracting payload from ast_frames, and calling the decoder. + */ + +/*--- h263+ support --- */ + +/*! \brief initialization of h263p */ +static int h263p_enc_init(struct video_out_desc *v) +{ + /* modes supported are + - Unrestricted Motion Vector (annex D) + - Advanced Prediction (annex F) + - Advanced Intra Coding (annex I) + - Deblocking Filter (annex J) + - Slice Structure (annex K) + - Alternative Inter VLC (annex S) + - Modified Quantization (annex T) + */ + v->enc_ctx->flags |=CODEC_FLAG_H263P_UMV; /* annex D */ + v->enc_ctx->flags |=CODEC_FLAG_AC_PRED; /* annex f ? */ + v->enc_ctx->flags |=CODEC_FLAG_H263P_SLICE_STRUCT; /* annex k */ + v->enc_ctx->flags |= CODEC_FLAG_H263P_AIC; /* annex I */ + + v->enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds + return 0; +} + + +/* + * Create RTP/H.263 fragments to avoid IP fragmentation. We fragment on a + * PSC or a GBSC, but if we don't find a suitable place just break somewhere. + * Everything is byte-aligned. + */ +static struct ast_frame *h263p_encap(struct video_out_desc *out, + struct ast_frame **tail) +{ + struct ast_frame *cur = NULL, *first = NULL; + uint8_t *d = out->enc_out.data; + int len = out->enc_out.used; + int l = len; /* size of the current fragment. If 0, must look for a psc */ + + for (;len > 0; len -= l, d += l) { + uint8_t *data; + struct ast_frame *f; + int i, h; + + if (len >= 3 && d[0] == 0 && d[1] == 0 && d[2] >= 0x80) { + /* we are starting a new block, so look for a PSC. */ + for (i = 3; i < len - 3; i++) { + if (d[i] == 0 && d[i+1] == 0 && d[i+2] >= 0x80) { + l = i; + break; + } + } + } + if (l > out->mtu || l > len) { /* psc not found, split */ + l = MIN(len, out->mtu); + } + if (l < 1 || l > out->mtu) { + ast_log(LOG_WARNING, "--- frame error l %d\n", l); + break; + } + + if (d[0] == 0 && d[1] == 0) { /* we start with a psc */ + h = 0; + } else { /* no psc, create a header */ + h = 2; + } + + f = create_video_frame(d, d+l, AST_FORMAT_H263_PLUS, h, cur); + if (!f) + break; + + data = f->data; + if (h == 0) { /* we start with a psc */ + data[0] |= 0x04; // set P == 1, and we are done + } else { /* no psc, create a header */ + data[0] = data[1] = 0; // P == 0 + } + + if (!cur) + first = f; + cur = f; + } + + if (cur) + cur->subclass |= 1; // RTP Marker + + *tail = cur; /* end of the list */ + return first; +} + +/*! \brief extract the bitstreem from the RTP payload. + * This is format dependent. + * For h263+, the format is defined in RFC 2429 + * and basically has a fixed 2-byte header as follows: + * 5 bits RR reserved, shall be 0 + * 1 bit P indicate a start/end condition, + * in which case the payload should be prepended + * by two zero-valued bytes. + * 1 bit V there is an additional VRC header after this header + * 6 bits PLEN length in bytes of extra picture header + * 3 bits PEBIT how many bits to be ignored in the last byte + * + * XXX the code below is not complete. + */ +static int h263p_decap(struct fbuf_t *b, uint8_t *data, int len) +{ + int PLEN; + + if (len < 2) { + ast_log(LOG_WARNING, "invalid framesize %d\n", len); + return 1; + } + PLEN = ( (data[0] & 1) << 5 ) | ( (data[1] & 0xf8) >> 3); + + if (PLEN > 0) { + data += PLEN; + len -= PLEN; + } + if (data[0] & 4) /* bit P */ + data[0] = data[1] = 0; + else { + data += 2; + len -= 2; + } + return fbuf_append(b, data, len, 0, 0); /* ignore trail bits */ +} + + +/* + * generic encoder, used by the various protocols supported here. + * We assume that the buffer is empty at the beginning. + */ +static int ffmpeg_encode(struct video_out_desc *v) +{ + struct fbuf_t *b = &v->enc_out; + int i; + + b->used = avcodec_encode_video(v->enc_ctx, b->data, b->size, v->frame); + i = avcodec_encode_video(v->enc_ctx, b->data + b->used, b->size - b->used, NULL); /* delayed frames ? */ + if (i > 0) { + ast_log(LOG_WARNING, "have %d more bytes\n", i); + b->used += i; + } + return 0; +} + +/* + * Generic decoder, which is used by h263p, h263 and h261 as it simply + * invokes ffmpeg's decoder. + * av_parser_parse should merge a randomly chopped up stream into + * proper frames. After that, if we have a valid frame, we decode it + * until the entire frame is processed. + */ +static int ffmpeg_decode(struct video_in_desc *v, struct fbuf_t *b) +{ + uint8_t *src = b->data; + int srclen = b->used; + int full_frame = 0; + + if (srclen == 0) /* no data */ + return 0; + if (0) + check_h261(b); + // ast_log(LOG_WARNING, "rx size %d\n", srclen); + while (srclen) { + uint8_t *data; + int datalen, ret; + int len = av_parser_parse(v->parser, v->dec_ctx, &data, &datalen, src, srclen, 0, 0); + + src += len; + srclen -= len; + /* The parser might return something it cannot decode, so it skips + * the block returning no data + */ + if (data == NULL || datalen == 0) + continue; + ret = avcodec_decode_video(v->dec_ctx, v->d_frame, &full_frame, data, datalen); + if (full_frame == 1) /* full frame */ + break; + if (ret < 0) { + ast_log(LOG_NOTICE, "Error decoding\n"); + break; + } + } + if (srclen != 0) /* update b with leftover data */ + bcopy(src, b->data, srclen); + b->used = srclen; + b->ebit = 0; + return full_frame; +} + +static struct video_codec_desc h263p_codec = { + .name = "h263p", + .format = AST_FORMAT_H263_PLUS, + .enc_init = h263p_enc_init, + .enc_encap = h263p_encap, + .enc_run = ffmpeg_encode, + .dec_init = NULL, + .dec_decap = h263p_decap, + .dec_run = ffmpeg_decode +}; + +/*--- Plain h263 support --------*/ + +static int h263_enc_init(struct video_out_desc *v) +{ + /* XXX check whether these are supported */ + v->enc_ctx->flags |= CODEC_FLAG_H263P_UMV; + v->enc_ctx->flags |= CODEC_FLAG_H263P_AIC; + v->enc_ctx->flags |= CODEC_FLAG_H263P_SLICE_STRUCT; + v->enc_ctx->flags |= CODEC_FLAG_AC_PRED; + + v->enc_ctx->gop_size = v->fps*5; + + return 0; +} + +/* + * h263 encapsulation is specified in RFC2190. There are three modes + * defined (A, B, C), with 4, 8 and 12 bytes of header, respectively. + * The header is made as follows + * 0.....................|.......................|.............|....31 + * F:1 P:1 SBIT:3 EBIT:3 SRC:3 I:1 U:1 S:1 A:1 R:4 DBQ:2 TRB:3 TR:8 + * FP = 0- mode A, (only one word of header) + * FP = 10 mode B, and also means this is an I or P frame + * FP = 11 mode C, and also means this is a PB frame. + * SBIT, EBIT nuber of bits to ignore at beginning (msbits) and end (lsbits) + * SRC bits 6,7,8 from the h263 PTYPE field + * I = 0 intra-coded, 1 = inter-coded (bit 9 from PTYPE) + * U = 1 for Unrestricted Motion Vector (bit 10 from PTYPE) + * S = 1 for Syntax Based Arith coding (bit 11 from PTYPE) + * A = 1 for Advanced Prediction (bit 12 from PTYPE) + * R = reserved, must be 0 + * DBQ = differential quantization, DBQUANT from h263, 0 unless we are using + * PB frames + * TRB = temporal reference for bframes, also 0 unless this is a PB frame + * TR = temporal reference for P frames, also 0 unless PB frame. + * + * Mode B and mode C description omitted. + * + * An RTP frame can start with a PSC 0000 0000 0000 0000 1000 0 + * or with a GBSC, which also has the first 17 bits as a PSC. + * Note - PSC are byte-aligned, GOB not necessarily. PSC start with + * PSC:22 0000 0000 0000 0000 1000 00 picture start code + * TR:8 .... .... temporal reference + * PTYPE:13 or more ptype... + * If we don't fragment a GOB SBIT and EBIT = 0. + * reference, 8 bit) + * + * The assumption below is that we start with a PSC. + */ +static struct ast_frame *h263_encap(struct video_out_desc *out, + struct ast_frame **tail) +{ + uint8_t *d = out->enc_out.data; + int start = 0, i, len = out->enc_out.used; + struct ast_frame *f, *cur = NULL, *first = NULL; + const int pheader_len = 4; /* Use RFC-2190 Mode A */ + uint8_t h263_hdr[12]; /* worst case, room for a type c header */ + uint8_t *h = h263_hdr; /* shorthand */ + +#define H263_MIN_LEN 6 + if (len < H263_MIN_LEN) /* unreasonably small */ + return NULL; + + bzero(h263_hdr, sizeof(h263_hdr)); + /* Now set the header bytes. Only type A by now, + * and h[0] = h[2] = h[3] = 0 by default. + * PTYPE starts 30 bits in the picture, so the first useful + * bit for us is bit 36 i.e. within d[4] (0 is the msbit). + * SRC = d[4] & 0x1c goes into data[1] & 0xe0 + * I = d[4] & 0x02 goes into data[1] & 0x10 + * U = d[4] & 0x01 goes into data[1] & 0x08 + * S = d[5] & 0x80 goes into data[1] & 0x04 + * A = d[5] & 0x40 goes into data[1] & 0x02 + * R = 0 goes into data[1] & 0x01 + * Optimizing it, we have + */ + h[1] = ( (d[4] & 0x1f) << 3 ) | /* SRC, I, U */ + ( (d[5] & 0xc0) >> 5 ); /* S, A, R */ + + /* now look for the next PSC or GOB header. First try to hit + * a '0' byte then look around for the 0000 0000 0000 0000 1 pattern + * which is both in the PSC and the GBSC. + */ + for (i = H263_MIN_LEN, start = 0; start < len; start = i, i += 3) { + //ast_log(LOG_WARNING, "search at %d of %d/%d\n", i, start, len); + for (; i < len ; i++) { + uint8_t x, rpos, lpos; + int rpos_i; /* index corresponding to rpos */ + if (d[i] != 0) /* cannot be in a GBSC */ + continue; + if (i > len - 1) + break; + x = d[i+1]; + if (x == 0) /* next is equally good */ + continue; + /* see if around us we can make 16 '0' bits for the GBSC. + * Look for the first bit set on the right, and then + * see if we have enough 0 on the left. + * We are guaranteed to end before rpos == 0 + */ + for (rpos = 0x80, rpos_i = 8; rpos; rpos >>= 1, rpos_i--) + if (x & rpos) /* found the '1' bit in GBSC */ + break; + x = d[i-1]; /* now look behind */ + for (lpos = rpos; lpos ; lpos >>= 1) + if (x & lpos) /* too early, not a GBSC */ + break; + if (lpos) /* as i said... */ + continue; + /* now we have a GBSC starting somewhere in d[i-1], + * but it might be not byte-aligned + */ + if (rpos == 0x80) { /* lucky case */ + i = i - 1; + } else { /* XXX to be completed */ + ast_log(LOG_WARNING, "unaligned GBSC 0x%x %d\n", + rpos, rpos_i); + } + break; + } + /* This frame is up to offset i (not inclusive). + * We do not split it yet even if larger than MTU. + */ + f = create_video_frame(d + start, d+i, AST_FORMAT_H263, + pheader_len, cur); + + if (!f) + break; + bcopy(h, f->data, 4); /* copy the h263 header */ + /* XXX to do: if not aligned, fix sbit and ebit, + * then move i back by 1 for the next frame + */ + if (!cur) + first = f; + cur = f; + } + + if (cur) + cur->subclass |= 1; // RTP Marker + + *tail = cur; + return first; +} + +/* XXX We only drop the header here, but maybe we need more. */ +static int h263_decap(struct fbuf_t *b, uint8_t *data, int len) +{ + if (len < 4) { + ast_log(LOG_WARNING, "invalid framesize %d\n", len); + return 1; /* error */ + } + + if ( (data[0] & 0x80) == 0) { + len -= 4; + data += 4; + } else { + ast_log(LOG_WARNING, "unsupported mode 0x%x\n", + data[0]); + return 1; + } + return fbuf_append(b, data, len, 0, 0); /* XXX no bit alignment support yet */ +} + +static struct video_codec_desc h263_codec = { + .name = "h263", + .format = AST_FORMAT_H263, + .enc_init = h263_enc_init, + .enc_encap = h263_encap, + .enc_run = ffmpeg_encode, + .dec_init = NULL, + .dec_decap = h263_decap, + .dec_run = ffmpeg_decode + +}; + +/*---- h261 support -----*/ +static int h261_enc_init(struct video_out_desc *v) +{ + /* It is important to set rtp_payload_size = 0, otherwise + * ffmpeg in h261 mode will produce output that it cannot parse. + * Also try to send I frames more frequently than with other codecs. + */ + v->enc_ctx->rtp_payload_size = 0; /* important - ffmpeg fails otherwise */ + v->enc_ctx->gop_size = v->fps*2; /* be more responsive */ + + return 0; +} + +/* + * The encapsulation of H261 is defined in RFC4587 which obsoletes RFC2032 + * The bitstream is preceded by a 32-bit header word: + * SBIT:3 EBIT:3 I:1 V:1 GOBN:4 MBAP:5 QUANT:5 HMVD:5 VMVD:5 + * SBIT and EBIT are the bits to be ignored at beginning and end, + * I=1 if the stream has only INTRA frames - cannot change during the stream. + * V=0 if motion vector is not used. Cannot change. + * GOBN is the GOB number in effect at the start of packet, 0 if we + * start with a GOB header + * QUANT is the quantizer in effect, 0 if we start with GOB header + * HMVD reference horizontal motion vector. 10000 is forbidden + * VMVD reference vertical motion vector, as above. + * Packetization should occur at GOB boundaries, and if not possible + * with MacroBlock fragmentation. However it is likely that blocks + * are not bit-aligned so we must take care of this. + */ +static struct ast_frame *h261_encap(struct video_out_desc *out, + struct ast_frame **tail) +{ + uint8_t *d = out->enc_out.data; + int start = 0, i, len = out->enc_out.used; + struct ast_frame *f, *cur = NULL, *first = NULL; + const int pheader_len = 4; + uint8_t h261_hdr[4]; + uint8_t *h = h261_hdr; /* shorthand */ + int sbit = 0, ebit = 0; + +#define H261_MIN_LEN 10 + if (len < H261_MIN_LEN) /* unreasonably small */ + return NULL; + + bzero(h261_hdr, sizeof(h261_hdr)); + + /* Similar to the code in h263_encap, but the marker there is longer. + * Start a few bytes within the bitstream to avoid hitting the marker + * twice. Note we might access the buffer at len, but this is ok because + * the caller has it oversized. + */ + for (i = H261_MIN_LEN, start = 0; start < len - 1; start = i, i += 4) { +#if 0 /* test - disable packetization */ + i = len; /* wrong... */ +#else + int found = 0, found_ebit = 0; /* last GBSC position found */ + for (; i < len ; i++) { + uint8_t x, rpos, lpos; + if (d[i] != 0) /* cannot be in a GBSC */ + continue; + x = d[i+1]; + if (x == 0) /* next is equally good */ + continue; + /* See if around us we find 15 '0' bits for the GBSC. + * Look for the first bit set on the right, and then + * see if we have enough 0 on the left. + * We are guaranteed to end before rpos == 0 + */ + for (rpos = 0x80, ebit = 7; rpos; ebit--, rpos >>= 1) + if (x & rpos) /* found the '1' bit in GBSC */ + break; + x = d[i-1]; /* now look behind */ + for (lpos = (rpos >> 1); lpos ; lpos >>= 1) + if (x & lpos) /* too early, not a GBSC */ + break; + if (lpos) /* as i said... */ + continue; + /* now we have a GBSC starting somewhere in d[i-1], + * but it might be not byte-aligned. Just remember it. + */ + if (i - start > out->mtu) /* too large, stop now */ + break; + found_ebit = ebit; + found = i; + i += 4; /* continue forward */ + } + if (i >= len) { /* trim if we went too forward */ + i = len; + ebit = 0; /* hopefully... should ask the bitstream ? */ + } + if (i - start > out->mtu && found) { + /* use the previous GBSC, hope is within the mtu */ + i = found; + ebit = found_ebit; + } +#endif /* test */ + if (i - start < 4) /* XXX too short ? */ + continue; + /* This frame is up to offset i (not inclusive). + * We do not split it yet even if larger than MTU. + */ + f = create_video_frame(d + start, d+i, AST_FORMAT_H261, + pheader_len, cur); + + if (!f) + break; + /* recompute header with I=0, V=1 */ + h[0] = ( (sbit & 7) << 5 ) | ( (ebit & 7) << 2 ) | 1; + bcopy(h, f->data, 4); /* copy the h261 header */ + if (ebit) /* not aligned, restart from previous byte */ + i--; + sbit = (8 - ebit) & 7; + ebit = 0; + if (!cur) + first = f; + cur = f; + } + if (cur) + cur->subclass |= 1; // RTP Marker + + *tail = cur; + return first; +} + +/* + * Pieces might be unaligned so we really need to put them together. + */ +static int h261_decap(struct fbuf_t *b, uint8_t *data, int len) +{ + int ebit, sbit; + + if (len < 8) { + ast_log(LOG_WARNING, "invalid framesize %d\n", len); + return 1; + } + sbit = (data[0] >> 5) & 7; + ebit = (data[0] >> 2) & 7; + len -= 4; + data += 4; + return fbuf_append(b, data, len, sbit, ebit); +} + +static struct video_codec_desc h261_codec = { + .name = "h261", + .format = AST_FORMAT_H261, + .enc_init = h261_enc_init, + .enc_encap = h261_encap, + .enc_run = ffmpeg_encode, + .dec_init = NULL, + .dec_decap = h261_decap, + .dec_run = ffmpeg_decode +}; + +/* mpeg4 support */ +static int mpeg4_enc_init(struct video_out_desc *v) +{ +#if 0 + //v->enc_ctx->flags |= CODEC_FLAG_LOW_DELAY; /*don't use b frames ?*/ + v->enc_ctx->flags |= CODEC_FLAG_AC_PRED; + v->enc_ctx->flags |= CODEC_FLAG_H263P_UMV; + v->enc_ctx->flags |= CODEC_FLAG_QPEL; + v->enc_ctx->flags |= CODEC_FLAG_4MV; + v->enc_ctx->flags |= CODEC_FLAG_GMC; + v->enc_ctx->flags |= CODEC_FLAG_LOOP_FILTER; + v->enc_ctx->flags |= CODEC_FLAG_H263P_SLICE_STRUCT; +#endif + v->enc_ctx->gop_size = v->fps*5; + v->enc_ctx->rtp_payload_size = 0; /* important - ffmpeg fails otherwise */ + return 0; +} + +/* simplistic encapsulation - just split frames in mtu-size units */ +static struct ast_frame *mpeg4_encap(struct video_out_desc *out, + struct ast_frame **tail) +{ + struct ast_frame *f, *cur = NULL, *first = NULL; + uint8_t *d = out->enc_out.data; + uint8_t *end = d+out->enc_out.used; + int len; + + for (;d < end; d += len, cur = f) { + len = MIN(out->mtu, end-d); + f = create_video_frame(d, d+len, AST_FORMAT_MP4_VIDEO, 0, cur); + if (!f) + break; + if (!first) + first = f; + } + if (cur) + cur->subclass |= 1; + *tail = cur; + return first; +} + +static int mpeg4_decap(struct fbuf_t *b, uint8_t *data, int len) +{ + return fbuf_append(b, data, len, 0, 0); +} + +static int mpeg4_decode(struct video_in_desc *v, struct fbuf_t *b) +{ + int full_frame = 0, datalen = b->used; + int ret = avcodec_decode_video(v->dec_ctx, v->d_frame, &full_frame, + b->data, datalen); + if (ret < 0) { + ast_log(LOG_NOTICE, "Error decoding\n"); + ret = datalen; /* assume we used everything. */ + } + datalen -= ret; + if (datalen > 0) /* update b with leftover bytes */ + bcopy(b->data + ret, b->data, datalen); + b->used = datalen; + b->ebit = 0; + return full_frame; +} + +static struct video_codec_desc mpeg4_codec = { + .name = "mpeg4", + .format = AST_FORMAT_MP4_VIDEO, + .enc_init = mpeg4_enc_init, + .enc_encap = mpeg4_encap, + .enc_run = ffmpeg_encode, + .dec_init = NULL, + .dec_decap = mpeg4_decap, + .dec_run = mpeg4_decode +}; + +static int h264_enc_init(struct video_out_desc *v) +{ + v->enc_ctx->flags |= CODEC_FLAG_TRUNCATED; + //v->enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER; + //v->enc_ctx->flags2 |= CODEC_FLAG2_FASTPSKIP; + /* TODO: Maybe we need to add some other flags */ + v->enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds + v->enc_ctx->rtp_mode = 0; + v->enc_ctx->rtp_payload_size = 0; + v->enc_ctx->bit_rate_tolerance = v->enc_ctx->bit_rate; + return 0; +} + +static int h264_dec_init(struct video_in_desc *v) +{ + v->dec_ctx->flags |= CODEC_FLAG_TRUNCATED; + + return 0; +} + +/* + * The structure of a generic H.264 stream is: + * - 0..n 0-byte(s), unused, optional. one zero-byte is always present + * in the first NAL before the start code prefix. + * - start code prefix (3 bytes): 0x000001 + * (the first bytestream has a + * like these 0x00000001!) + * - NAL header byte ( F[1] | NRI[2] | Type[5] ) where type != 0 + * - byte-stream + * - 0..n 0-byte(s) (padding, unused). + * Segmentation in RTP only needs to be done on start code prefixes. + * If fragments are too long... we don't support it yet. + * - encapsulate (or fragment) the byte-stream (with NAL header included) + */ +static struct ast_frame *h264_encap(struct video_out_desc *out, + struct ast_frame **tail) +{ + struct ast_frame *f = NULL, *cur = NULL, *first = NULL; + uint8_t *d, *start = out->enc_out.data; + uint8_t *end = start + out->enc_out.used; + + /* Search the first start code prefix - ITU-T H.264 sec. B.2, + * and move start right after that, on the NAL header byte. + */ +#define HAVE_NAL(x) (x[-4] == 0 && x[-3] == 0 && x[-2] == 0 && x[-1] == 1) + for (start += 4; start < end; start++) { + int ty = start[0] & 0x1f; + if (HAVE_NAL(start) && ty != 0 && ty != 31) + break; + } + /* if not found, or too short, we just skip the next loop and are done. */ + + /* Here follows the main loop to create frames. Search subsequent start + * codes, and then possibly fragment the unit into smaller fragments. + */ + for (;start < end - 4; start = d) { + int size; /* size of current block */ + uint8_t hdr[2]; /* add-on header when fragmenting */ + int ty = 0; + + /* now search next nal */ + for (d = start + 4; d < end; d++) { + ty = d[0] & 0x1f; + if (HAVE_NAL(d)) + break; /* found NAL */ + } + /* have a block to send. d past the start code unless we overflow */ + if (d >= end) { /* NAL not found */ + d = end + 4; + } else if (ty == 0 || ty == 31) { /* found but invalid type, skip */ + ast_log(LOG_WARNING, "skip invalid nal type %d at %d of %d\n", + ty, d - out->enc_out.data, out->enc_out.used); + continue; + } + + size = d - start - 4; /* don't count the end */ + + if (size < out->mtu) { // test - don't fragment + // Single NAL Unit + f = create_video_frame(start, d - 4, AST_FORMAT_H264, 0, cur); + if (!f) + break; + if (!first) + first = f; + + cur = f; + continue; + } + + // Fragmented Unit (Mode A: no DON, very weak) + hdr[0] = (*start & 0xe0) | 28; /* mark as a fragmentation unit */ + hdr[1] = (*start++ & 0x1f) | 0x80 ; /* keep type and set START bit */ + size--; /* skip the NAL header */ + while (size) { + uint8_t *data; + int frag_size = MIN(size, out->mtu); + + f = create_video_frame(start, start+frag_size, AST_FORMAT_H264, 2, cur); + if (!f) + break; + size -= frag_size; /* skip this data block */ + start += frag_size; + + data = f->data; + data[0] = hdr[0]; + data[1] = hdr[1] | (size == 0 ? 0x40 : 0); /* end bit if we are done */ + hdr[1] &= ~0x80; /* clear start bit for subsequent frames */ + if (!first) + first = f; + cur = f; + } + } + + if (cur) + cur->subclass |= 1; // RTP Marker + + *tail = cur; + + return first; +} + +static int h264_decap(struct fbuf_t *b, uint8_t *data, int len) +{ + /* Start Code Prefix (Annex B in specification) */ + uint8_t scp[] = { 0x00, 0x00, 0x00, 0x01 }; + int retval = 0; + int type, ofs = 0; + + if (len < 2) { + ast_log(LOG_WARNING, "--- invalid len %d\n", len); + return 1; + } + /* first of all, check if the packet has F == 0 */ + if (data[0] & 0x80) { + ast_log(LOG_WARNING, "--- forbidden packet; nal: %02x\n", + data[0]); + return 1; + } + + type = data[0] & 0x1f; + switch (type) { + case 0: + case 31: + ast_log(LOG_WARNING, "--- invalid type: %d\n", type); + return 1; + case 24: + case 25: + case 26: + case 27: + case 29: + ast_log(LOG_WARNING, "--- encapsulation not supported : %d\n", type); + return 1; + case 28: /* FU-A Unit */ + if (data[1] & 0x80) { // S == 1, import F and NRI from next + data[1] &= 0x1f; /* preserve type */ + data[1] |= (data[0] & 0xe0); /* import F & NRI */ + retval = fbuf_append(b, scp, sizeof(scp), 0, 0); + ofs = 1; + } else { + ofs = 2; + } + break; + default: /* From 1 to 23 (Single NAL Unit) */ + retval = fbuf_append(b, scp, sizeof(scp), 0, 0); + } + if (!retval) + retval = fbuf_append(b, data + ofs, len - ofs, 0, 0); + if (retval) + ast_log(LOG_WARNING, "result %d\n", retval); + return retval; +} + +static struct video_codec_desc h264_codec = { + .name = "h264", + .format = AST_FORMAT_H264, + .enc_init = h264_enc_init, + .enc_encap = h264_encap, + .enc_run = ffmpeg_encode, + .dec_init = h264_dec_init, + .dec_decap = h264_decap, + .dec_run = ffmpeg_decode +}; + +/*------ end codec specific code -----*/ + + +/* Video4Linux stuff is only used in video_open() */ +#ifdef HAVE_VIDEODEV_H +#include <linux/videodev.h> +#endif + +/*! + * Open the local video source and allocate a buffer + * for storing the image. Return 0 on success, -1 on error + */ +static int video_open(struct video_out_desc *v) +{ + struct fbuf_t *b = &v->loc_src; + if (b->data) /* buffer allocated means device already open */ + return v->fd; + v->fd = -1; + /* + * if the device is "X11", then open the x11 grabber + */ + if (!strcasecmp(v->videodevice, "X11")) { + XImage *im; + int screen_num; + + /* init the connection with the X server */ + v->dpy = XOpenDisplay(NULL); + if (v->dpy == NULL) { + ast_log(LOG_WARNING, "error opening display\n"); + goto error; + } + + /* find width and height of the screen */ + screen_num = DefaultScreen(v->dpy); + v->screen_width = DisplayWidth(v->dpy, screen_num); + v->screen_height = DisplayHeight(v->dpy, screen_num); + + v->image = im = XGetImage(v->dpy, + RootWindow(v->dpy, DefaultScreen(v->dpy)), + b->x, b->y, b->w, b->h, AllPlanes, ZPixmap); + if (v->image == NULL) { + ast_log(LOG_WARNING, "error creating Ximage\n"); + goto error; + } + switch (im->bits_per_pixel) { + case 32: + b->pix_fmt = PIX_FMT_RGBA32; + break; + case 16: + b->pix_fmt = (im->green_mask == 0x7e0) ? PIX_FMT_RGB565 : PIX_FMT_RGB555; + break; + } + + ast_log(LOG_NOTICE, "image: data %p %d bpp fmt %d, mask 0x%lx 0x%lx 0x%lx\n", + im->data, + im->bits_per_pixel, + b->pix_fmt, + im->red_mask, im->green_mask, im->blue_mask); + + /* set the pointer but not the size as this is not malloc'ed */ + b->data = (uint8_t *)im->data; + v->fd = -2; + } +#ifdef HAVE_VIDEODEV_H + else { + /* V4L specific */ + struct video_window vw = { 0 }; /* camera attributes */ + struct video_picture vp; + int i; + const char *dev = v->videodevice; + + v->fd = open(dev, O_RDONLY | O_NONBLOCK); + if (v->fd < 0) { + ast_log(LOG_WARNING, "error opening camera %s\n", v->videodevice); + return v->fd; + } + + i = fcntl(v->fd, F_GETFL); + if (-1 == fcntl(v->fd, F_SETFL, i | O_NONBLOCK)) { + /* non fatal, just emit a warning */ + ast_log(LOG_WARNING, "error F_SETFL for %s [%s]\n", + dev, strerror(errno)); + } + /* set format for the camera. + * In principle we could retry with a different format if the + * one we are asking for is not supported. + */ + vw.width = v->loc_src.w; + vw.height = v->loc_src.h; + vw.flags = v->fps << 16; + if (ioctl(v->fd, VIDIOCSWIN, &vw) == -1) { + ast_log(LOG_WARNING, "error setting format for %s [%s]\n", + dev, strerror(errno)); + goto error; + } + if (ioctl(v->fd, VIDIOCGPICT, &vp) == -1) { + ast_log(LOG_WARNING, "error reading picture info\n"); + goto error; + } + ast_log(LOG_WARNING, + "contrast %d bright %d colour %d hue %d white %d palette %d\n", + vp.contrast, vp.brightness, + vp.colour, vp.hue, + vp.whiteness, vp.palette); + /* set the video format. Here again, we don't necessary have to + * fail if the required format is not supported, but try to use + * what the camera gives us. + */ + b->pix_fmt = vp.palette; + vp.palette = VIDEO_PALETTE_YUV420P; + if (ioctl(v->fd, VIDIOCSPICT, &vp) == -1) { + ast_log(LOG_WARNING, "error setting palette, using %d\n", + b->pix_fmt); + } else + b->pix_fmt = vp.palette; + /* allocate the source buffer. + * XXX, the code here only handles yuv411, for other formats + * we need to look at pix_fmt and set size accordingly + */ + b->size = (b->w * b->h * 3)/2; /* yuv411 */ + ast_log(LOG_WARNING, "videodev %s opened, size %dx%d %d\n", + dev, b->w, b->h, b->size); + v->loc_src.data = ast_calloc(1, b->size); + if (!b->data) { + ast_log(LOG_WARNING, "error allocating buffer %d bytes\n", + b->size); + goto error; + } + ast_log(LOG_WARNING, "success opening camera\n"); + } +#endif /* HAVE_VIDEODEV_H */ + + if (v->image == NULL && v->fd < 0) + goto error; + b->used = 0; + return 0; + +error: + ast_log(LOG_WARNING, "fd %d dpy %p img %p data %p\n", + v->fd, v->dpy, v->image, v->loc_src.data); + /* XXX maybe XDestroy (v->image) ? */ + if (v->dpy) + XCloseDisplay(v->dpy); + v->dpy = NULL; + if (v->fd >= 0) + close(v->fd); + v->fd = -1; + fbuf_free(&v->loc_src); + return -1; +} + +/*! \brief complete a buffer from the local video source. + * Called by get_video_frames(), in turn called by the video thread. + */ +static int video_read(struct video_out_desc *v) +{ + struct timeval now = ast_tvnow(); + struct fbuf_t *b = &v->loc_src; + + if (b->data == NULL) /* not initialized */ + return 0; + + /* check if it is time to read */ + if (ast_tvzero(v->last_frame)) + v->last_frame = now; + if (ast_tvdiff_ms(now, v->last_frame) < 1000/v->fps) + return 0; /* too early */ + v->last_frame = now; /* XXX actually, should correct for drift */ + +#ifdef HAVE_X11 + if (v->image) { + /* read frame from X11 */ + AVPicture p; + XGetSubImage(v->dpy, + RootWindow(v->dpy, DefaultScreen(v->dpy)), + b->x, b->y, b->w, b->h, AllPlanes, ZPixmap, v->image, 0, 0); + + b->data = (uint8_t *)v->image->data; + fill_pict(b, &p); + return p.linesize[0] * b->h; + } +#endif + if (v->fd < 0) /* no other source */ + return 0; + for (;;) { + int r, l = v->loc_src.size - v->loc_src.used; + r = read(v->fd, v->loc_src.data + v->loc_src.used, l); + // ast_log(LOG_WARNING, "read %d of %d bytes from webcam\n", r, l); + if (r < 0) /* read error */ + return 0; + if (r == 0) /* no data */ + return 0; + v->loc_src.used += r; + if (r == l) { + v->loc_src.used = 0; /* prepare for next frame */ + return v->loc_src.size; + } + } +} + +/* Helper function to process incoming video. + * For each incoming video call invoke ffmpeg_init() to intialize + * the decoding structure then incoming video frames are processed + * by write_video() which in turn calls pre_process_data(), to extract + * the bitstream; accumulates data into a buffer within video_desc. When + * a frame is complete (determined by the marker bit in the RTP header) + * call decode_video() to decoding and if it successful call show_frame() + * to display the frame. + */ + +/* + * Table of translation between asterisk and ffmpeg formats. + * We need also a field for read and write (encoding and decoding), because + * e.g. H263+ uses different codec IDs in ffmpeg when encoding or decoding. + */ +struct _cm { /* map ffmpeg codec types to asterisk formats */ + uint32_t ast_format; /* 0 is a terminator */ + enum CodecID codec; + enum { CM_RD = 1, CM_WR = 2, CM_RDWR = 3 } rw; /* read or write or both ? */ + struct video_codec_desc *codec_desc; +}; + +static struct _cm video_formats[] = { + { AST_FORMAT_H263_PLUS, CODEC_ID_H263, CM_RD }, /* incoming H263P ? */ + { AST_FORMAT_H263_PLUS, CODEC_ID_H263P, CM_WR }, + { AST_FORMAT_H263, CODEC_ID_H263, CM_RD }, + { AST_FORMAT_H263, CODEC_ID_H263, CM_WR }, + { AST_FORMAT_H261, CODEC_ID_H261, CM_RDWR }, + { AST_FORMAT_H264, CODEC_ID_H264, CM_RDWR }, + { AST_FORMAT_MP4_VIDEO, CODEC_ID_MPEG4, CM_RDWR }, + { 0, 0, 0 }, +}; + + +/*! \brief map an asterisk format into an ffmpeg one */ +static enum CodecID map_video_format(uint32_t ast_format, int rw) +{ + struct _cm *i; + + for (i = video_formats; i->ast_format != 0; i++) + if (ast_format & i->ast_format && rw & i->rw && rw & i->rw) + return i->codec; + return CODEC_ID_NONE; +} + +/* pointers to supported codecs. We assume the first one to be non null. */ +static struct video_codec_desc *supported_codecs[] = { + &h263p_codec, + &h264_codec, + &h263_codec, + &h261_codec, + &mpeg4_codec, + NULL +}; + +/* + * Map the AST_FORMAT to the library. If not recognised, fail. + * This is useful in the input path where we get frames. + */ +static struct video_codec_desc *map_video_codec(int fmt) +{ + int i; + + for (i = 0; supported_codecs[i]; i++) + if (fmt == supported_codecs[i]->format) { + ast_log(LOG_WARNING, "using %s for format 0x%x\n", + supported_codecs[i]->name, fmt); + return supported_codecs[i]; + } + return NULL; +} +; +/* + * Map the codec name to the library. If not recognised, use a default. + * This is useful in the output path where we decide by name, presumably. + */ +static struct video_codec_desc *map_config_video_format(char *name) +{ + int i; + + for (i = 0; supported_codecs[i]; i++) + if (!strcasecmp(name, supported_codecs[i]->name)) + break; + if (supported_codecs[i] == NULL) { + ast_log(LOG_WARNING, "Cannot find codec for '%s'\n", name); + i = 0; + strcpy(name, supported_codecs[i]->name); + } + ast_log(LOG_WARNING, "Using codec '%s'\n", name); + return supported_codecs[i]; +} + +/*! \brief uninitialize the descriptor for remote video stream */ +static int video_in_uninit(struct video_in_desc *v) +{ + int i; + + if (v->parser) { + av_parser_close(v->parser); + v->parser = NULL; + } + if (v->dec_ctx) { + avcodec_close(v->dec_ctx); + av_free(v->dec_ctx); + v->dec_ctx = NULL; + } + if (v->d_frame) { + av_free(v->d_frame); + v->d_frame = NULL; + } + v->codec = NULL; /* only a reference */ + v->dec = NULL; /* forget the decoder */ + v->discard = 1; /* start in discard mode */ + for (i = 0; i < N_DEC_IN; i++) + fbuf_free(&v->dec_in[i]); + fbuf_free(&v->dec_out); + fbuf_free(&v->rem_dpy); + return -1; /* error, in case someone cares */ +} + +/* + * initialize ffmpeg resources used for decoding frames from the network. + */ +static int video_in_init(struct video_in_desc *v, uint32_t format) +{ + enum CodecID codec; + + /* XXX should check that these are already set */ + v->codec = NULL; + v->dec_ctx = NULL; + v->d_frame = NULL; + v->parser = NULL; + v->discard = 1; + + codec = map_video_format(format, CM_RD); + + v->codec = avcodec_find_decoder(codec); + if (!v->codec) { + ast_log(LOG_WARNING, "Unable to find the decoder for format %d\n", codec); + return video_in_uninit(v); + } + /* + * Initialize the codec context. + */ + v->dec_ctx = avcodec_alloc_context(); + if (avcodec_open(v->dec_ctx, v->codec) < 0) { + ast_log(LOG_WARNING, "Cannot open the codec context\n"); + av_free(v->dec_ctx); + v->dec_ctx = NULL; + return video_in_uninit(v); + } + + v->parser = av_parser_init(codec); + if (!v->parser) { + ast_log(LOG_WARNING, "Cannot initialize the decoder parser\n"); + return video_in_uninit(v); + } + + v->d_frame = avcodec_alloc_frame(); + if (!v->d_frame) { + ast_log(LOG_WARNING, "Cannot allocate decoding video frame\n"); + return video_in_uninit(v); + } + return 0; /* ok */ +} + +/*! \brief uninitialize the descriptor for local video stream */ +static int video_out_uninit(struct video_out_desc *v) +{ + if (v->enc_ctx) { + avcodec_close(v->enc_ctx); + av_free(v->enc_ctx); + v->enc_ctx = NULL; + } + if (v->frame) { + av_free(v->frame); + v->frame = NULL; + } + v->codec = NULL; /* only a reference */ + + fbuf_free(&v->loc_src); + fbuf_free(&v->enc_in); + fbuf_free(&v->enc_out); + fbuf_free(&v->loc_dpy); + if (v->image) { /* X11 grabber */ + XCloseDisplay(v->dpy); + v->dpy = NULL; + v->image = NULL; + } + if (v->fd >= 0) { + close(v->fd); + v->fd = -1; + } + return -1; +} + +/* + * Initialize the encoder for the local source: + * - AVCodecContext, AVCodec, AVFrame are used by ffmpeg for encoding; + * - encbuf is used to store the encoded frame (to be sent) + * - mtu is used to determine the max size of video fragment + * NOTE: we enter here with the video source already open. + */ +static int video_out_init(struct video_desc *env) +{ + int codec; + int size; + struct fbuf_t *enc_in; + struct video_out_desc *v = &env->out; + + v->enc_ctx = NULL; + v->codec = NULL; + v->frame = NULL; + v->enc_out.data = NULL; + + if (v->loc_src.data == NULL) { + ast_log(LOG_WARNING, "No local source active\n"); + return video_out_uninit(v); + } + codec = map_video_format(v->enc->format, CM_WR); + v->codec = avcodec_find_encoder(codec); + if (!v->codec) { + ast_log(LOG_WARNING, "Cannot find the encoder for format %d\n", + codec); + return video_out_uninit(v); + } + + v->mtu = 1400; /* set it early so the encoder can use it */ + + /* allocate the input buffer for encoding. + * ffmpeg only supports PIX_FMT_YUV420P for the encoding. + */ + enc_in = &v->enc_in; + enc_in->pix_fmt = PIX_FMT_YUV420P; + enc_in->size = (enc_in->w * enc_in->h * 3)/2; + enc_in->data = ast_calloc(1, enc_in->size); + if (!enc_in->data) { + ast_log(LOG_WARNING, "Cannot allocate encoder input buffer\n"); + return video_out_uninit(v); + } + v->frame = avcodec_alloc_frame(); + if (!v->frame) { + ast_log(LOG_WARNING, "Unable to allocate the encoding video frame\n"); + return video_out_uninit(v); + } + + /* parameters for PIX_FMT_YUV420P */ + size = enc_in->w * enc_in->h; + v->frame->data[0] = enc_in->data; + v->frame->data[1] = v->frame->data[0] + size; + v->frame->data[2] = v->frame->data[1] + size/4; + v->frame->linesize[0] = enc_in->w; + v->frame->linesize[1] = enc_in->w/2; + v->frame->linesize[2] = enc_in->w/2; + + /* now setup the parameters for the encoder */ + v->enc_ctx = avcodec_alloc_context(); + v->enc_ctx->pix_fmt = enc_in->pix_fmt; + v->enc_ctx->width = enc_in->w; + v->enc_ctx->height = enc_in->h; + /* XXX rtp_callback ? + * rtp_mode so ffmpeg inserts as many start codes as possible. + */ + v->enc_ctx->rtp_mode = 1; + v->enc_ctx->rtp_payload_size = v->mtu / 2; // mtu/2 + v->enc_ctx->bit_rate = v->bitrate; + v->enc_ctx->bit_rate_tolerance = v->enc_ctx->bit_rate/2; + v->enc_ctx->qmin = v->qmin; /* should be configured */ + v->enc_ctx->time_base = (AVRational){1, v->fps}; + + v->enc->enc_init(v); + + if (avcodec_open(v->enc_ctx, v->codec) < 0) { + ast_log(LOG_WARNING, "Unable to initialize the encoder %d\n", + codec); + av_free(v->enc_ctx); + v->enc_ctx = NULL; + return video_out_uninit(v); + } + + /* + * Allocate enough for the encoded bitstream. As we are compressing, + * we hope that the output is never larger than the input size. + */ + v->enc_out.data = ast_calloc(1, enc_in->size); + v->enc_out.size = enc_in->size; + v->enc_out.used = 0; + + return 0; +} + +static void cleanup_sdl(struct video_desc *env) +{ + int i; + +#ifdef HAVE_SDL_TTF + /* unload font file */ + if (env->gui.font) { + TTF_CloseFont(env->gui.font); + env->gui.font = NULL; + } + + /* uninitialize SDL_ttf library */ + if ( TTF_WasInit() ) + TTF_Quit(); +#endif + + /* uninitialize the SDL environment */ + for (i = 0; i < WIN_MAX; i++) { + if (env->win[i].bmp) + SDL_FreeYUVOverlay(env->win[i].bmp); + } + if (env->gui.keypad) + SDL_FreeSurface(env->gui.keypad); + env->gui.keypad = NULL; + SDL_Quit(); + env->screen = NULL; /* XXX check reference */ + bzero(env->win, sizeof(env->win)); + if (env->sdl_ok) + ast_mutex_destroy(&(env->in.dec_in_lock)); +} + +/*! \brief uninitialize the entire environment. + * In practice, signal the thread and give it a bit of time to + * complete, giving up if it gets stuck. Because uninit + * is called from hangup with the channel locked, and the thread + * uses the chan lock, we need to unlock here. This is unsafe, + * and we should really use refcounts for the channels. + */ +static void console_video_uninit(struct video_desc *env) +{ + int i, t = 100; /* initial wait is shorter, than make it longer */ + env->shutdown = 1; + for (i=0; env->shutdown && i < 10; i++) { + ast_channel_unlock(env->owner); + usleep(t); + t = 1000000; + ast_channel_lock(env->owner); + } + env->owner = NULL; +} + +/*! fill an AVPicture from our fbuf info, as it is required by + * the image conversion routines in ffmpeg. + * XXX This depends on the format. + */ +static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p) +{ + /* provide defaults for commonly used formats */ + int l4 = b->w * b->h/4; /* size of U or V frame */ + int len = b->w; /* Y linesize, bytes */ + int luv = b->w/2; /* U/V linesize, bytes */ + + bzero(p, sizeof(*p)); + switch (b->pix_fmt) { + case PIX_FMT_RGB555: + case PIX_FMT_RGB565: + len *= 2; + luv = 0; + break; + case PIX_FMT_RGBA32: + len *= 4; + luv = 0; + break; + case PIX_FMT_YUYV422: /* Packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr */ + len *= 2; /* all data in first plane, probably */ + luv = 0; + break; + } + p->data[0] = b->data; + p->linesize[0] = len; + /* these are only valid for component images */ + p->data[1] = luv ? b->data + 4*l4 : b->data+len; + p->data[2] = luv ? b->data + 5*l4 : b->data+len; + p->linesize[1] = luv; + p->linesize[2] = luv; + return p; +} + +/*! convert/scale between an input and an output format. + * Old version of ffmpeg only have img_convert, which does not rescale. + * New versions use sws_scale which does both. + */ +static void my_scale(struct fbuf_t *in, AVPicture *p_in, + struct fbuf_t *out, AVPicture *p_out) +{ + AVPicture my_p_in, my_p_out; + + if (p_in == NULL) + p_in = fill_pict(in, &my_p_in); + if (p_out == NULL) + p_out = fill_pict(out, &my_p_out); + +#ifdef OLD_FFMPEG + /* XXX img_convert is deprecated, and does not do rescaling */ + img_convert(p_out, out->pix_fmt, + p_in, in->pix_fmt, in->w, in->h); +#else /* XXX replacement */ + { + struct SwsContext *convert_ctx; + + convert_ctx = sws_getContext(in->w, in->h, in->pix_fmt, + out->w, out->h, out->pix_fmt, + SWS_BICUBIC, NULL, NULL, NULL); + if (convert_ctx == NULL) { + ast_log(LOG_ERROR, "FFMPEG::convert_cmodel : swscale context initialization failed"); + return; + } + if (0) + ast_log(LOG_WARNING, "in %d %dx%d out %d %dx%d\n", + in->pix_fmt, in->w, in->h, out->pix_fmt, out->w, out->h); + sws_scale(convert_ctx, + p_in->data, p_in->linesize, + in->w, in->h, /* src slice */ + p_out->data, p_out->linesize); + + sws_freeContext(convert_ctx); + } +#endif /* XXX replacement */ +} + +/* + * Display video frames (from local or remote stream) using the SDL library. + * - Set the video mode to use the resolution specified by the codec context + * - Create a YUV Overlay to copy the frame into it; + * - After the frame is copied into the overlay, display it + * + * The size is taken from the configuration. + * + * 'out' is 0 for remote video, 1 for the local video + */ +static void show_frame(struct video_desc *env, int out) +{ + AVPicture *p_in, p_out; + struct fbuf_t *b_in, *b_out; + SDL_Overlay *bmp; + + if (!env->sdl_ok) + return; + + if (out == WIN_LOCAL) { /* webcam/x11 to sdl */ + b_in = &env->out.enc_in; + b_out = &env->out.loc_dpy; + p_in = NULL; + } else { + /* copy input format from the decoding context */ + AVCodecContext *c = env->in.dec_ctx; + b_in = &env->in.dec_out; + b_in->pix_fmt = c->pix_fmt; + b_in->w = c->width; + b_in->h = c->height; + + b_out = &env->in.rem_dpy; + p_in = (AVPicture *)env->in.d_frame; + } + bmp = env->win[out].bmp; + SDL_LockYUVOverlay(bmp); + /* output picture info - this is sdl, YUV420P */ + bzero(&p_out, sizeof(p_out)); + p_out.data[0] = bmp->pixels[0]; + p_out.data[1] = bmp->pixels[1]; + p_out.data[2] = bmp->pixels[2]; + p_out.linesize[0] = bmp->pitches[0]; + p_out.linesize[1] = bmp->pitches[1]; + p_out.linesize[2] = bmp->pitches[2]; + + my_scale(b_in, p_in, b_out, &p_out); + + /* lock to protect access to Xlib by different threads. */ + SDL_DisplayYUVOverlay(bmp, &env->win[out].rect); + SDL_UnlockYUVOverlay(bmp); +} + +static struct video_desc *get_video_desc(struct ast_channel *c); + +/* + * This function is called (by asterisk) for each video packet + * coming from the network (the 'in' path) that needs to be processed. + * We need to reconstruct the entire video frame before we can decode it. + * After a video packet is received we have to: + * - extract the bitstream with pre_process_data() + * - append the bitstream to a buffer + * - if the fragment is the last (RTP Marker) we decode it with decode_video() + * - after the decoding is completed we display the decoded frame with show_frame() + */ +static int console_write_video(struct ast_channel *chan, struct ast_frame *f) +{ + struct video_desc *env = get_video_desc(chan); + struct video_in_desc *v = &env->in; + + if (v->dec == NULL) { /* try to get the codec */ + v->dec = map_video_codec(f->subclass & ~1); + if (v->dec == NULL) { + ast_log(LOG_WARNING, "cannot find video codec, drop input 0x%x\n", f->subclass); + return 0; + } + if (video_in_init(v, v->dec->format)) { + /* This is not fatal, but we won't have incoming video */ + ast_log(LOG_WARNING, "Cannot initialize input decoder\n"); + v->dec = NULL; + return 0; + } + } + if (v->dec_ctx == NULL) { + ast_log(LOG_WARNING, "cannot decode, dropping frame\n"); + return 0; /* error */ + } + + if (v->dec_in_cur == NULL) /* no buffer for incoming frames, drop */ + return 0; +#if defined(DROP_PACKETS) && DROP_PACKETS > 0 + /* Simulate lost packets */ + if ((random() % 10000) <= 100*DROP_PACKETS) { + ast_log(LOG_NOTICE, "Packet lost [%d]\n", f->seqno); + return 0; + } +#endif + if (v->discard) { + /* + * In discard mode, drop packets until we find one with + * the RTP marker set (which is the end of frame). + * Note that the RTP marker flag is sent as the LSB of the + * subclass, which is a bitmask of formats. The low bit is + * normally used for audio so there is no interference. + */ + if (f->subclass & 0x01) { + v->dec_in_cur->used = 0; + v->dec_in_cur->ebit = 0; + v->next_seq = f->seqno + 1; /* wrap at 16 bit */ + v->discard = 0; + ast_log(LOG_WARNING, "out of discard mode, frame %d\n", f->seqno); + } + return 0; + } + + /* + * Only in-order fragments will be accepted. Remember seqno + * has 16 bit so there is wraparound. Also, ideally we could + * accept a bit of reordering, but at the moment we don't. + */ + if (v->next_seq != f->seqno) { + ast_log(LOG_WARNING, "discarding frame out of order, %d %d\n", + v->next_seq, f->seqno); + v->discard = 1; + return 0; + } + v->next_seq++; + + if (f->data == NULL || f->datalen < 2) { + ast_log(LOG_WARNING, "empty video frame, discard\n"); + return 0; + } + if (v->dec->dec_decap(v->dec_in_cur, f->data, f->datalen)) { + ast_log(LOG_WARNING, "error in dec_decap, enter discard\n"); + v->discard = 1; + } + if (f->subclass & 0x01) { // RTP Marker + /* prepare to decode: advance the buffer so the video thread knows. */ + struct fbuf_t *tmp = v->dec_in_cur; /* store current pointer */ + ast_mutex_lock(&v->dec_in_lock); + if (++v->dec_in_cur == &v->dec_in[N_DEC_IN]) /* advance to next, circular */ + v->dec_in_cur = &v->dec_in[0]; + if (v->dec_in_dpy == NULL) { /* were not displaying anything, so set it */ + v->dec_in_dpy = tmp; + } else if (v->dec_in_dpy == v->dec_in_cur) { /* current slot is busy */ + v->dec_in_cur = NULL; + } + ast_mutex_unlock(&v->dec_in_lock); + } + return 0; +} + + +/*! \brief read a frame from webcam or X11 through video_read(), + * display it, then encode and split it. + * Return a list of ast_frame representing the video fragments. + * The head pointer is returned by the function, the tail pointer + * is returned as an argument. + */ +static struct ast_frame *get_video_frames(struct video_desc *env, struct ast_frame **tail) +{ + struct video_out_desc *v = &env->out; + struct ast_frame *dummy; + + if (!v->loc_src.data) { + static volatile int a = 0; + if (a++ < 2) + ast_log(LOG_WARNING, "fail, no loc_src buffer\n"); + return NULL; + } + if (!video_read(v)) + return NULL; /* can happen, e.g. we are reading too early */ + + if (tail == NULL) + tail = &dummy; + *tail = NULL; + /* Scale the video for the encoder, then use it for local rendering + * so we will see the same as the remote party. + */ + my_scale(&v->loc_src, NULL, &v->enc_in, NULL); + show_frame(env, WIN_LOCAL); + if (!v->sendvideo) + return NULL; + if (v->enc_out.data == NULL) { + static volatile int a = 0; + if (a++ < 2) + ast_log(LOG_WARNING, "fail, no encbuf\n"); + return NULL; + } + v->enc->enc_run(v); + return v->enc->enc_encap(v, tail); +} + +/* + * GUI layout, structure and management + * + +For the GUI we use SDL to create a large surface (env->screen) +containing tree sections: remote video on the left, local video +on the right, and the keypad with all controls and text windows +in the center. +The central section is built using two images: one is the skin, +the other one is a mask where the sensitive areas of the skin +are colored in different grayscale levels according to their +functions. The mapping between colors and function is defined +in the 'enum pixel_value' below. + +Mouse and keyboard events are detected on the whole surface, and +handled differently according to their location, as follows: + +- drag on the local video window are used to move the captured + area (in the case of X11 grabber) or the picture-in-picture + location (in case of camera included on the X11 grab). +- click on the keypad are mapped to the corresponding key; +- drag on some keypad areas (sliders etc.) are mapped to the + corresponding functions; +- keystrokes are used as keypad functions, or as text input + if we are in text-input mode. + +To manage these behavior we use two status variables, +that defines if keyboard events should be redirect to dialing functions +or to write message functions, and if mouse events should be used +to implement keypad functionalities or to drag the capture device. + +Configuration options control the appeareance of the gui: + + keypad = /tmp/phone.jpg ; the keypad on the screen + keypad_mask = /tmp/phone.png ; the grayscale mask + keypad_font = /tmp/font.ttf ; the font to use for output + + * + */ + +/* enumerate for the pixel value. 0..127 correspond to ascii chars */ +enum pixel_value { + /* answer/close functions */ + KEY_PICK_UP = 128, + KEY_HANG_UP = 129, + + /* other functions */ + KEY_MUTE = 130, + KEY_AUTOANSWER = 131, + KEY_SENDVIDEO = 132, + KEY_LOCALVIDEO = 133, + KEY_REMOTEVIDEO = 134, + KEY_WRITEMESSAGE = 135, + KEY_GUI_CLOSE = 136, /* close gui */ + + /* other areas within the keypad */ + KEY_DIGIT_BACKGROUND = 255, + + /* areas outside the keypad - simulated */ + KEY_OUT_OF_KEYPAD = 251, + KEY_REM_DPY = 252, + KEY_LOC_DPY = 253, +}; + +/* + * Handlers for the various keypad functions + */ + +/*! \brief append a character, or reset if '\0' */ +static void append_char(char *str, int *str_pos, const char c) +{ + int i = *str_pos; + if (c == '\0') + i = 0; + else if (i < GUI_BUFFER_LEN - 1) + str[i++] = c; + else + i = GUI_BUFFER_LEN - 1; /* unnecessary, i think */ + str = '\0'; + *str_pos = i; +} + +/* accumulate digits, possibly call dial if in connected mode */ +static void keypad_digit(struct video_desc *env, int digit) +{ + struct chan_oss_pvt *o = find_desc(oss_active); + + if (o->owner) { /* we have a call, send the digit */ + struct ast_frame f = { AST_FRAME_DTMF, 0 }; + + f.subclass = digit; + ast_queue_frame(o->owner, &f); + } else { /* no call, accumulate digits */ + append_char(env->gui.inbuf, &env->gui.inbuf_pos, digit); + } +} + +/* this is a wrapper for actions that are available through the cli */ +/* TODO append arg to command and send the resulting string as cli command */ +static void keypad_send_command(struct video_desc *env, char *command) +{ + ast_log(LOG_WARNING, "keypad_send_command(%s) called\n", command); + ast_cli_command(env->gui.outfd, command); + return; +} + +/* function used to toggle on/off the status of some variables */ +static char *keypad_toggle(int index) +{ + struct chan_oss_pvt *o = find_desc(oss_active); + ast_log(LOG_WARNING, "keypad_toggle(%i) called\n", index); + + switch (index) { + case KEY_MUTE: + o->mute = !o->mute; + break; + case KEY_SENDVIDEO: + o->env->out.sendvideo = !o->env->out.sendvideo; + break; + case KEY_AUTOANSWER: + o->autoanswer = !o->autoanswer; + break; + } + return NULL; +} + +/* + * Function called when the pick up button is pressed + * perform actions according the channel status: + * + * - if no one is calling us and no digits was pressed, + * the operation have no effects, + * - if someone is calling us we answer to the call. + * - if we have no call in progress and we pressed some + * digit, send the digit to the console. + */ +static void keypad_pick_up(struct video_desc *env) +{ + struct chan_oss_pvt *o = find_desc(oss_active); + ast_log(LOG_WARNING, "keypad_pick_up called\n"); + + if (o->owner) { /* someone is calling us, just answer */ + struct ast_frame f = { AST_FRAME_CONTROL, AST_CONTROL_ANSWER }; + o->hookstate = 1; + o->cursound = -1; + o->nosound = 0; + ast_queue_frame(o->owner, &f); + } else if (env->gui.inbuf_pos) { /* we have someone to call */ + ast_cli_command(env->gui.outfd, env->gui.inbuf); + } + + append_char(env->gui.inbuf, &env->gui.inbuf_pos, '\0'); /* clear buffer */ +} + +#if 0 /* still unused */ +/* Print given text on the gui */ +static int gui_output(struct video_desc *env, const char *text) +{ +#ifndef HAVE_SDL_TTF + return 1; /* error, not supported */ +#else + int x = 30, y = 20; /* XXX change */ + SDL_Surface *output = NULL; + SDL_Color color = {0, 0, 0}; /* text color */ + SDL_Rect dest = {env->win[WIN_KEYPAD].rect.x + x, y}; + + /* clean surface each rewrite */ + SDL_BlitSurface(env->gui.keypad, NULL, env->screen, &env->win[WIN_KEYPAD].rect); + + output = TTF_RenderText_Solid(env->gui.font, text, color); + if (output == NULL) { + ast_log(LOG_WARNING, "Cannot render text on gui - %s\n", TTF_GetError()); + return 1; + } + + SDL_BlitSurface(output, NULL, env->screen, &dest); + + SDL_UpdateRects(env->gui.keypad, 1, &env->win[WIN_KEYPAD].rect); + SDL_FreeSurface(output); + return 0; /* success */ +#endif +} +#endif + +static int video_geom(struct fbuf_t *b, const char *s); +static void sdl_setup(struct video_desc *env); +static int kp_match_area(const struct keypad_entry *e, int x, int y); + +/* + * Handle SDL_MOUSEBUTTONDOWN type, finding the palette + * index value and calling the right callback. + * + * x, y are referred to the upper left corner of the main SDL window. + */ +static void handle_button_event(struct video_desc *env, SDL_MouseButtonEvent button) +{ + uint8_t index = KEY_OUT_OF_KEYPAD; /* the key or region of the display we clicked on */ + + /* for each click we come back in normal mode */ + env->gui.text_mode = 0; + + /* define keypad boundary */ + if (button.x < env->in.rem_dpy.w) + index = KEY_REM_DPY; /* click on remote video */ + else if (button.x > env->in.rem_dpy.w + env->out.keypad_dpy.w) + index = KEY_LOC_DPY; /* click on local video */ + else if (button.y > env->out.keypad_dpy.h) + index = KEY_OUT_OF_KEYPAD; /* click outside the keypad */ + else if (env->gui.kp) { + int i; + for (i = 0; i < env->gui.kp_used; i++) { + if (kp_match_area(&env->gui.kp[i], button.x - env->in.rem_dpy.w, button.y)) { + index = env->gui.kp[i].c; + break; + } + } + } + + /* exec the function */ + if (index < 128) { /* surely clicked on the keypad, don't care which key */ + keypad_digit(env, index); + return; + } + switch (index) { + /* answer/close function */ + case KEY_PICK_UP: + keypad_pick_up(env); + break; + case KEY_HANG_UP: + keypad_send_command(env, "console hangup"); + break; + + /* other functions */ + case KEY_MUTE: + case KEY_AUTOANSWER: + case KEY_SENDVIDEO: + keypad_toggle(index); + break; + + case KEY_LOCALVIDEO: + break; + case KEY_REMOTEVIDEO: + break; + case KEY_WRITEMESSAGE: + /* goes in text-mode */ + env->gui.text_mode = 1; + break; + + + /* press outside the keypad. right increases size, center decreases, left drags */ + case KEY_LOC_DPY: + case KEY_REM_DPY: + if (button.button == SDL_BUTTON_LEFT) { + if (index == KEY_LOC_DPY) { + /* store points where the drag start + * and switch in drag mode */ + env->gui.x_drag = button.x; + env->gui.y_drag = button.y; + env->gui.drag_mode = 1; + } + break; + } else { + char buf[128]; + struct fbuf_t *fb = index == KEY_LOC_DPY ? &env->out.loc_dpy : &env->in.rem_dpy; + sprintf(buf, "%c%dx%d", button.button == SDL_BUTTON_RIGHT ? '>' : '<', + fb->w, fb->h); + video_geom(fb, buf); + sdl_setup(env); + } + break; + case KEY_OUT_OF_KEYPAD: + break; + + case KEY_GUI_CLOSE: + cleanup_sdl(env); + break; + case KEY_DIGIT_BACKGROUND: + break; + default: + ast_log(LOG_WARNING, "function not yet defined %i\n", index); + } +} + +/* + * Handle SDL_KEYDOWN type event, put the key pressed + * in the dial buffer or in the text-message buffer, + * depending on the text_mode variable value. + * + * key is the SDLKey structure corresponding to the key pressed. + */ +static void handle_keyboard_input(struct video_desc *env, SDLKey key) +{ + if (env->gui.text_mode) { + /* append in the text-message buffer */ + if (key == SDLK_RETURN) { + /* send the text message and return in normal mode */ + env->gui.text_mode = 0; + keypad_send_command(env, "send text"); + } else { + /* accumulate the key in the message buffer */ + append_char(env->gui.msgbuf, &env->gui.msgbuf_pos, key); + } + } + else { + /* append in the dial buffer */ + append_char(env->gui.inbuf, &env->gui.inbuf_pos, key); + } + + return; +} + +/* + * Check if the grab point is inside the X screen. + * + * x represent the new grab value + * limit represent the upper value to use + */ +static int boundary_checks(int x, int limit) +{ + return (x <= 0) ? 0 : (x > limit ? limit : x); +} + +/* implement superlinear acceleration on the movement */ +static int move_accel(int delta) +{ + int d1 = delta*delta / 100; + return (delta > 0) ? delta + d1 : delta - d1; +} + +/* + * Move the source of the captured video. + * + * x_final_drag and y_final_drag are the coordinates where the drag ends, + * start coordinares are in the gui_info structure. + */ +static void move_capture_source(struct video_desc *env, int x_final_drag, int y_final_drag) +{ + int new_x, new_y; /* new coordinates for grabbing local video */ + int x = env->out.loc_src.x; /* old value */ + int y = env->out.loc_src.y; /* old value */ + + /* move the origin */ +#define POLARITY -1 /* +1 or -1 depending on the desired direction */ + new_x = x + POLARITY*move_accel(x_final_drag - env->gui.x_drag) * 3; + new_y = y + POLARITY*move_accel(y_final_drag - env->gui.y_drag) * 3; +#undef POLARITY + env->gui.x_drag = x_final_drag; /* update origin */ + env->gui.y_drag = y_final_drag; + + /* check boundary and let the source to grab from the new points */ + env->out.loc_src.x = boundary_checks(new_x, env->out.screen_width - env->out.loc_src.w); + env->out.loc_src.y = boundary_checks(new_y, env->out.screen_height - env->out.loc_src.h); + return; +} + +/* + * I am seeing some kind of deadlock or stall around + * SDL_PumpEvents() while moving the window on a remote X server + * (both xfree-4.4.0 and xorg 7.2) + * and windowmaker. It is unclear what causes it. + */ + +/* grab a bunch of events */ +static void eventhandler(struct video_desc *env) +{ +#define N_EVENTS 32 + int i, n; + SDL_Event ev[N_EVENTS]; + +#define MY_EV (SDL_MOUSEBUTTONDOWN|SDL_KEYDOWN) + while ( (n = SDL_PeepEvents(ev, N_EVENTS, SDL_GETEVENT, SDL_ALLEVENTS)) > 0) { + for (i = 0; i < n; i++) { +#if 0 + ast_log(LOG_WARNING, "------ event %d at %d %d\n", + ev[i].type, ev[i].button.x, ev[i].button.y); +#endif + switch (ev[i].type) { + case SDL_KEYDOWN: + handle_keyboard_input(env, ev[i].key.keysym.sym); + break; + case SDL_MOUSEMOTION: + if (env->gui.drag_mode != 0) + move_capture_source(env, ev[i].motion.x, ev[i].motion.y); + break; + case SDL_MOUSEBUTTONDOWN: + handle_button_event(env, ev[i].button); + break; + case SDL_MOUSEBUTTONUP: + if (env->gui.drag_mode != 0) { + move_capture_source(env, ev[i].button.x, ev[i].button.y); + env->gui.drag_mode = 0; + } + break; + } + + } + } + if (1) { + struct timeval b, a = ast_tvnow(); + int i; + //SDL_Lock_EventThread(); + SDL_PumpEvents(); + b = ast_tvnow(); + i = ast_tvdiff_ms(b, a); + if (i > 3) + fprintf(stderr, "-------- SDL_PumpEvents took %dms\n", i); + //SDL_Unlock_EventThread(); + } +} + +static SDL_Surface *get_keypad(const char *file) +{ + SDL_Surface *temp; + +#ifdef HAVE_SDL_IMAGE + temp = IMG_Load(file); +#else + temp = SDL_LoadBMP(file); +#endif + if (temp == NULL) + fprintf(stderr, "Unable to load image %s: %s\n", + file, SDL_GetError()); + return temp; +} + +/* TODO: consistency checks, check for bpp, widht and height */ +/* Init the mask image used to grab the action. */ +static int gui_init(struct video_desc *env) +{ + /* initialize keypad status */ + env->gui.text_mode = 0; + env->gui.drag_mode = 0; + + /* initialize grab coordinates */ + env->out.loc_src.x = 0; + env->out.loc_src.y = 0; + + /* initialize keyboard buffer */ + append_char(env->gui.inbuf, &env->gui.inbuf_pos, '\0'); + append_char(env->gui.msgbuf, &env->gui.msgbuf_pos, '\0'); + +#ifdef HAVE_SDL_TTF + /* Initialize SDL_ttf library and load font */ + if (TTF_Init() == -1) { + ast_log(LOG_WARNING, "Unable to init SDL_ttf, no output available\n"); + return -1; + } + +#define GUI_FONTSIZE 28 + env->gui.font = TTF_OpenFont( env->keypad_font, GUI_FONTSIZE); + if (!env->gui.font) { + ast_log(LOG_WARNING, "Unable to load font %s, no output available\n", env->keypad_font); + return -1; + } + ast_log(LOG_WARNING, "Loaded font %s\n", env->keypad_font); +#endif + + env->gui.outfd = open ("/dev/null", O_WRONLY); /* discard output, temporary */ + if ( env->gui.outfd < 0 ) { + ast_log(LOG_WARNING, "Unable output fd\n"); + return -1; + } + + return 0; +} + +static void sdl_setup(struct video_desc *env); + +/* + * Helper thread to periodically poll the video source and enqueue the + * generated frames to the channel's queue. + * Using a separate thread also helps because the encoding can be + * computationally expensive so we don't want to starve the main thread. + */ +static void *video_thread(void *arg) +{ + struct video_desc *env = arg; + int count = 0; + + env->screen = NULL; + bzero(env->win, sizeof(env->win)); + + if (SDL_Init(SDL_INIT_VIDEO)) { + ast_log(LOG_WARNING, "Could not initialize SDL - %s\n", + SDL_GetError()); + /* again not fatal, just we won't display anything */ + } else { + sdl_setup(env); + if (env->sdl_ok) + ast_mutex_init(&env->in.dec_in_lock); + /* TODO, segfault if not X display present */ + env->gui_ok = !gui_init(env); + if (!env->gui_ok) + ast_log(LOG_WARNING, "cannot init console gui\n"); + } + if (video_open(&env->out)) { + ast_log(LOG_WARNING, "cannot open local video source\n"); + } else { + /* try to register the fd. Unfortunately, if the webcam + * driver does not support select/poll we are out of luck. + */ + if (env->out.fd >= 0) + ast_channel_set_fd(env->owner, 1, env->out.fd); + video_out_init(env); + } + + for (;;) { + /* XXX 20 times/sec */ + struct timeval t = { 0, 50000 }; + struct ast_frame *p, *f; + struct video_in_desc *v = &env->in; + struct ast_channel *chan = env->owner; + int fd = chan->alertpipe[1]; + + /* determine if video format changed */ + if (count++ % 10 == 0) { + char buf[160]; + if (env->out.sendvideo) + sprintf(buf, "%s %s %dx%d @@ %dfps %dkbps", + env->out.videodevice, env->codec_name, + env->out.enc_in.w, env->out.enc_in.h, + env->out.fps, env->out.bitrate/1000); + else + sprintf(buf, "hold"); + SDL_WM_SetCaption(buf, NULL); + } + + /* manage keypad events */ + /* XXX here we should always check for events, + * otherwise the drag will not work */ + if (env->gui_ok) + eventhandler(env); + + /* sleep for a while */ + ast_select(0, NULL, NULL, NULL, &t); + + SDL_UpdateRects(env->screen, 1, &env->win[WIN_KEYPAD].rect);// XXX inefficient + /* + * While there is something to display, call the decoder and free + * the buffer, possibly enabling the receiver to store new data. + */ + while (v->dec_in_dpy) { + struct fbuf_t *tmp = v->dec_in_dpy; /* store current pointer */ + + if (v->dec->dec_run(v, tmp)) + show_frame(env, WIN_REMOTE); + tmp->used = 0; /* mark buffer as free */ + tmp->ebit = 0; + ast_mutex_lock(&v->dec_in_lock); + if (++v->dec_in_dpy == &v->dec_in[N_DEC_IN]) /* advance to next, circular */ + v->dec_in_dpy = &v->dec_in[0]; + + if (v->dec_in_cur == NULL) /* receiver was idle, enable it... */ + v->dec_in_cur = tmp; /* using the slot just freed */ + else if (v->dec_in_dpy == v->dec_in_cur) /* this was the last slot */ + v->dec_in_dpy = NULL; /* nothing more to display */ + ast_mutex_unlock(&v->dec_in_lock); + } + + + f = get_video_frames(env, &p); /* read and display */ + if (!f) + continue; + if (env->shutdown) + break; + chan = env->owner; + ast_channel_lock(chan); + + /* AST_LIST_INSERT_TAIL is only good for one frame, cannot use here */ + if (chan->readq.first == NULL) { + chan->readq.first = f; + } else { + chan->readq.last->frame_list.next = f; + } + chan->readq.last = p; + /* + * more or less same as ast_queue_frame, but extra + * write on the alertpipe to signal frames. + */ + if (fd > -1) { + int blah = 1, l = sizeof(blah); + for (p = f; p; p = AST_LIST_NEXT(p, frame_list)) { + if (write(fd, &blah, l) != l) + ast_log(LOG_WARNING, "Unable to write to alert pipe on %s, frametype/subclass %d/%d: %s!\n", + chan->name, f->frametype, f->subclass, strerror(errno)); + } + } + ast_channel_unlock(chan); + } + /* thread terminating, here could call the uninit */ + /* uninitialize the local and remote video environments */ + video_in_uninit(&env->in); + video_out_uninit(&env->out); + + if (env->sdl_ok) + cleanup_sdl(env); + + env->shutdown = 0; + return NULL; +} + +static void copy_geometry(struct fbuf_t *src, struct fbuf_t *dst) +{ + if (dst->w == 0) + dst->w = src->w; + if (dst->h == 0) + dst->h = src->h; +} + +/*! initialize the video environment. + * Apart from the formats (constant) used by sdl and the codec, + * we use enc_in as the basic geometry. + */ +static void init_env(struct video_desc *env) +{ + struct fbuf_t *c = &(env->out.loc_src); /* local source */ + struct fbuf_t *ei = &(env->out.enc_in); /* encoder input */ + struct fbuf_t *ld = &(env->out.loc_dpy); /* local display */ + struct fbuf_t *rd = &(env->in.rem_dpy); /* remote display */ + + c->pix_fmt = PIX_FMT_YUV420P; /* default - camera format */ + ei->pix_fmt = PIX_FMT_YUV420P; /* encoder input */ + if (ei->w == 0 || ei->h == 0) { + ei->w = 352; + ei->h = 288; + } + ld->pix_fmt = rd->pix_fmt = PIX_FMT_YUV420P; /* sdl format */ + /* inherit defaults */ + copy_geometry(ei, c); /* camera inherits from encoder input */ + copy_geometry(ei, rd); /* remote display inherits from encoder input */ + copy_geometry(rd, ld); /* local display inherits from remote display */ +} + +/* setup an sdl overlay and associated info, return 0 on success, != 0 on error */ +static int set_win(SDL_Surface *screen, struct display_window *win, int fmt, + int w, int h, int x, int y) +{ + win->bmp = SDL_CreateYUVOverlay(w, h, fmt, screen); + if (win->bmp == NULL) + return -1; /* error */ + win->rect.x = x; + win->rect.y = y; + win->rect.w = w; + win->rect.h = h; + return 0; +} + +/*! + * The first call to the video code, called by oss_new() or similar. + * Here we initialize the various components we use, namely SDL for display, + * ffmpeg for encoding/decoding, and a local video source. + * We do our best to progress even if some of the components are not + * available. + */ +static void console_video_start(struct video_desc *env, + struct ast_channel *owner) +{ + if (env == NULL) /* video not initialized */ + return; + if (owner == NULL) /* nothing to do if we don't have a channel */ + return; + env->owner = owner; + init_env(env); + env->out.enc = map_config_video_format(env->codec_name); + + ast_log(LOG_WARNING, "start video out %s %dx%d\n", + env->codec_name, env->out.enc_in.w, env->out.enc_in.h); + /* + * Register all codecs supported by the ffmpeg library. + * We only need to do it once, but probably doesn't + * harm to do it multiple times. + */ + avcodec_init(); + avcodec_register_all(); + av_log_set_level(AV_LOG_ERROR); /* only report errors */ + + if (env->out.fps == 0) { + env->out.fps = 15; + ast_log(LOG_WARNING, "fps unset, forcing to %d\n", env->out.fps); + } + if (env->out.bitrate == 0) { + env->out.bitrate = 65000; + ast_log(LOG_WARNING, "bitrate unset, forcing to %d\n", env->out.bitrate); + } + + ast_pthread_create_background(&env->vthread, NULL, video_thread, env); +} + +static int keypad_cfg_read(struct gui_info *gui, const char *val); +/* [re]set the main sdl window, useful in case of resize */ +static void sdl_setup(struct video_desc *env) +{ + int dpy_fmt = SDL_IYUV_OVERLAY; /* YV12 causes flicker in SDL */ + int maxw, maxh; + + /* + * initialize the SDL environment. We have one large window + * with local and remote video, and a keypad. + * At the moment we arrange them statically, as follows: + * - on the left, the remote video; + * - on the center, the keypad + * - on the right, the local video + */ + + /* Fetch the keypad now, we need it to know its size */ + if (!env->gui.keypad) + env->gui.keypad = get_keypad(env->keypad_file); + if (env->gui.keypad) { + int fd = -1; + void *p = NULL; + off_t l = 0; + + env->out.keypad_dpy.w = env->gui.keypad->w; + env->out.keypad_dpy.h = env->gui.keypad->h; + /* + * If the keypad image has a comment field, try to read + * the button location from there. The block must be + * keypad_entry = token shape x0 y0 x1 y1 h + * ... + * (basically, lines have the same format as config file entries. + * same as the keypad_entry. + * You can add it to a jpeg file using wrjpgcom + */ + do { /* only once, in fact */ + const unsigned char *s, *e; + + fd = open(env->keypad_file, O_RDONLY); + if (fd < 0) { + ast_log(LOG_WARNING, "fail to open %s\n", env->keypad_file); + break; + } + l = lseek(fd, 0, SEEK_END); + if (l <= 0) { + ast_log(LOG_WARNING, "fail to lseek %s\n", env->keypad_file); + break; + } + p = mmap(NULL, l, PROT_READ, 0, fd, 0); + if (p == NULL) { + ast_log(LOG_WARNING, "fail to mmap %s size %ld\n", env->keypad_file, (long)l); + break; + } + e = (const unsigned char *)p + l; + for (s = p; s < e - 20 ; s++) { + if (!memcmp(s, "keypad_entry", 12)) { /* keyword found */ + ast_log(LOG_WARNING, "found entry\n"); + break; + } + } + for ( ;s < e - 20; s++) { + char buf[256]; + const unsigned char *s1; + if (index(" \t\r\n", *s)) /* ignore blanks */ + continue; + if (*s > 127) /* likely end of comment */ + break; + if (memcmp(s, "keypad_entry", 12)) /* keyword not found */ + break; + s += 12; + l = MIN(sizeof(buf), e - s); + ast_copy_string(buf, s, l); + s1 = ast_skip_blanks(buf); /* between token and '=' */ + if (*s1++ != '=') /* missing separator */ + break; + if (*s1 == '>') /* skip => */ + s1++; + keypad_cfg_read(&env->gui, ast_skip_blanks(s1)); + /* now wait for a newline */ + s1 = s; + while (s1 < e - 20 && !index("\r\n", *s1) && *s1 < 128) + s1++; + s = s1; + } + } while (0); + if (p) + munmap(p, l); + if (fd >= 0) + close(fd); + } +#define BORDER 5 /* border around our windows */ + maxw = env->in.rem_dpy.w + env->out.loc_dpy.w + env->out.keypad_dpy.w; + maxh = MAX( MAX(env->in.rem_dpy.h, env->out.loc_dpy.h), env->out.keypad_dpy.h); + maxw += 4 * BORDER; + maxh += 2 * BORDER; + env->screen = SDL_SetVideoMode(maxw, maxh, 0, 0); + if (!env->screen) { + ast_log(LOG_ERROR, "SDL: could not set video mode - exiting\n"); + goto no_sdl; + } + + SDL_WM_SetCaption("Asterisk console Video Output", NULL); + if (set_win(env->screen, &env->win[WIN_REMOTE], dpy_fmt, + env->in.rem_dpy.w, env->in.rem_dpy.h, BORDER, BORDER)) + goto no_sdl; + if (set_win(env->screen, &env->win[WIN_LOCAL], dpy_fmt, + env->out.loc_dpy.w, env->out.loc_dpy.h, + 3*BORDER+env->in.rem_dpy.w + env->out.keypad_dpy.w, BORDER)) + goto no_sdl; + + /* display the skin, but do not free it as we need it later to + * restore text areas and maybe sliders too. + */ + if (env->gui.keypad) { + struct SDL_Rect *dest = &env->win[WIN_KEYPAD].rect; + dest->x = 2*BORDER + env->in.rem_dpy.w; + dest->y = BORDER; + dest->w = env->gui.keypad->w; + dest->h = env->gui.keypad->h; + SDL_BlitSurface(env->gui.keypad, NULL, env->screen, dest); + SDL_UpdateRects(env->screen, 1, dest); + } + env->in.dec_in_cur = &env->in.dec_in[0]; + env->in.dec_in_dpy = NULL; /* nothing to display */ + env->sdl_ok = 1; + +no_sdl: + if (env->sdl_ok == 0) /* free resources in case of errors */ + cleanup_sdl(env); +} + +/* see chan_oss.c for these macros */ +#ifndef M_START +#define _UNDO_M_START +#define M_START(var, val) \ + const char *__s = var; const char *__val = val; +#define M_END(x) x; +#define M_F(tag, f) if (!strcasecmp((__s), tag)) { f; } else +#define M_BOOL(tag, dst) M_F(tag, (dst) = ast_true(__val) ) +#define M_UINT(tag, dst) M_F(tag, (dst) = strtoul(__val, NULL, 0) ) +#define M_STR(tag, dst) M_F(tag, ast_copy_string(dst, __val, sizeof(dst))) +#endif + +/* + * Parse a geometry string, accepting also common names for the formats. + * Trick: if we have a leading > or < and a numeric geometry, + * return the larger or smaller one. + * E.g. <352x288 gives the smaller one, 320x240 + */ +static int video_geom(struct fbuf_t *b, const char *s) +{ + int w = 0, h = 0; + + static struct { + const char *s; int w; int h; + } *fp, formats[] = { + {"vga", 640, 480 }, + {"cif", 352, 288 }, + {"qvga", 320, 240 }, + {"qcif", 176, 144 }, + {"sqcif", 128, 96 }, + {NULL, 0, 0 }, + }; + if (*s == '<' || *s == '>') + sscanf(s+1,"%dx%d", &w, &h); + for (fp = formats; fp->s; fp++) { + if (*s == '>') { /* look for a larger one */ + if (fp->w <= w) { + if (fp > formats) + fp--; /* back one step if possible */ + break; + } + } else if (*s == '<') { /* look for a smaller one */ + if (fp->w < w) + break; + } else if (!strcasecmp(s, fp->s)) { /* look for a string */ + break; + } + } + if (*s == '<' && fp->s == NULL) /* smallest */ + fp--; + if (fp->s) { + b->w = fp->w; + b->h = fp->h; + } else if (sscanf(s, "%dx%d", &b->w, &b->h) != 2) { + ast_log(LOG_WARNING, "Invalid video_size %s, using 352x288\n", s); + b->w = 352; + b->h = 288; + } + return 0; +} + +/* + * Functions to determine if a point is within a region. Return 1 if success. + * First rotate the point, with + * x' = (x - x0) * cos A + (y - y0) * sin A + * y' = -(x - x0) * sin A + (y - y0) * cos A + * where cos A = (x1-x0)/l, sin A = (y1 - y0)/l, and + * l = sqrt( (x1-x0)^2 + (y1-y0)^2 + * Then determine inclusion by simple comparisons i.e.: + * rectangle: x >= 0 && x < l && y >= 0 && y < h + * ellipse: (x-xc)^2/l^2 + (y-yc)^2/h2 < 1 + */ +static int kp_match_area(const struct keypad_entry *e, int x, int y) +{ + double xp, dx = (e->x1 - e->x0); + double yp, dy = (e->y1 - e->y0); + double l = sqrt(dx*dx + dy*dy); + int ret = 0; + + if (l > 1) { /* large enough */ + xp = ((x - e->x0)*dx + (y - e->y0)*dy)/l; + yp = (-(x - e->x0)*dy + (y - e->y0)*dx)/l; + if (e->type == KP_RECT) { + ret = (xp >= 0 && xp < l && yp >=0 && yp < l); + } else if (e->type == KP_CIRCLE) { + dx = xp*xp/(l*l) + yp*yp/(e->h*e->h); + ret = (dx < 1); + } + } +#if 0 + ast_log(LOG_WARNING, "result %d [%d] for match %d,%d in type %d p0 %d,%d p1 %d,%d h %d\n", + ret, e->c, x, y, e->type, e->x0, e->y0, e->x1, e->y1, e->h); +#endif + return ret; +} + +/* + * read a keypad entry line in the format + * reset + * token circle xc yc diameter + * token circle xc yc x1 y1 h # ellipse, main diameter and height + * token rect x0 y0 x1 y1 h # rectangle with main side and eight + * token is the token to be returned, either a character or a symbol + * as KEY_* above + */ +struct _s_k { const char *s; int k; }; +static struct _s_k gui_key_map[] = { + {"PICK_UP", KEY_PICK_UP }, + {"PICKUP", KEY_PICK_UP }, + {"HANG_UP", KEY_HANG_UP }, + {"HANGUP", KEY_HANG_UP }, + {"MUTE", KEY_MUTE }, + {"AUTOANSWER", KEY_AUTOANSWER }, + {"SENDVIDEO", KEY_SENDVIDEO }, + {"LOCALVIDEO", KEY_LOCALVIDEO }, + {"REMOTEVIDEO", KEY_REMOTEVIDEO }, + {"WRITEMESSAGE", KEY_WRITEMESSAGE }, + {"GUI_CLOSE", KEY_GUI_CLOSE }, + {NULL, 0 } }; + +static int keypad_cfg_read(struct gui_info *gui, const char *val) +{ + struct keypad_entry e; + char s1[16], s2[16]; + int i, ret = 0; + + bzero(&e, sizeof(e)); + i = sscanf(val, "%14s %14s %d %d %d %d %d", + s1, s2, &e.x0, &e.y0, &e.x1, &e.y1, &e.h); + + switch (i) { + default: + break; + case 1: /* only "reset" is allowed */ + if (strcasecmp(s1, "reset")) /* invalid */ + break; + if (gui->kp) { + gui->kp_used = 0; + } + ret = 1; + break; + case 5: /* token circle xc yc diameter */ + if (strcasecmp(s2, "circle")) /* invalid */ + break; + e.h = e.x1; + e.y1 = e.y0; /* map radius in x1 y1 */ + e.x1 = e.x0 + e.h; /* map radius in x1 y1 */ + e.x0 = e.x0 - e.h; /* map radius in x1 y1 */ + /* fallthrough */ + + case 7: /* token circle|rect x0 y0 x1 y1 h */ + if (e.x1 < e.x0 || e.h <= 0) { + ast_log(LOG_WARNING, "error in coordinates\n"); + e.type = 0; + break; + } + if (!strcasecmp(s2, "circle")) { + /* for a circle we specify the diameter but store center and radii */ + e.type = KP_CIRCLE; + e.x0 = (e.x1 + e.x0) / 2; + e.y0 = (e.y1 + e.y0) / 2; + e.h = e.h / 2; + } else if (!strcasecmp(s2, "rect")) { + e.type = KP_RECT; + } else + break; + ret = 1; + } + // ast_log(LOG_WARNING, "reading [%s] returns %d %d\n", val, i, ret); + if (ret == 0) + return 0; + /* map the string into token to be returned */ + i = atoi(s1); + if (i > 0 || s1[1] == '\0') /* numbers or single characters */ + e.c = (i > 9) ? i : s1[0]; + else { + struct _s_k *p; + for (p = gui_key_map; p->s; p++) { + if (!strcasecmp(p->s, s1)) { + e.c = p->k; + break; + } + } + } + if (e.c == 0) { + ast_log(LOG_WARNING, "missing token\n"); + return 0; + } + if (gui->kp_size == 0) { + gui->kp = ast_calloc(10, sizeof(e)); + if (gui->kp == NULL) { + ast_log(LOG_WARNING, "cannot allocate kp"); + return 0; + } + gui->kp_size = 10; + } + if (gui->kp_size == gui->kp_used) { /* must allocate */ + struct keypad_entry *a = ast_realloc(gui->kp, sizeof(e)*(gui->kp_size+10)); + if (a == NULL) { + ast_log(LOG_WARNING, "cannot reallocate kp"); + return 0; + } + gui->kp = a; + gui->kp_size += 10; + } + if (gui->kp_size == gui->kp_used) + return 0; + ast_log(LOG_WARNING, "allocated entry %d\n", gui->kp_used); + gui->kp[gui->kp_used++] = e; + return 1; +} + +/* list of commands supported by the cli. + * For write operation we use the commands in console_video_config(), + * for reads we use console_video_cli(). XXX Names should be fixed. + */ +#define CONSOLE_VIDEO_CMDS \ + "console {videodevice|videocodec|sendvideo" \ + "|video_size|bitrate|fps|qmin" \ + "|keypad|keypad_mask|keypad_entry" \ + "}" + +/* extend ast_cli with video commands. Called by console_video_config */ +static int console_video_cli(struct video_desc *env, const char *var, int fd) +{ + if (env == NULL) + return 0; /* unrecognised */ + + if (!strcasecmp(var, "videodevice")) { + ast_cli(fd, "videodevice is [%s]\n", env->out.videodevice); + } else if (!strcasecmp(var, "videocodec")) { + ast_cli(fd, "videocodec is [%s]\n", env->codec_name); + } else if (!strcasecmp(var, "sendvideo")) { + ast_cli(fd, "sendvideo is [%s]\n", env->out.sendvideo ? "on" : "off"); + } else if (!strcasecmp(var, "video_size")) { + ast_cli(fd, "sizes: video %dx%d camera %dx%d local %dx%d remote %dx%d in %dx%d\n", + env->out.enc_in.w, env->out.enc_in.h, + env->out.loc_src.w, env->out.loc_src.h, + env->out.loc_dpy.w, env->out.loc_src.h, + env->in.rem_dpy.w, env->in.rem_dpy.h, + env->in.dec_out.w, env->in.dec_out.h); + } else if (!strcasecmp(var, "bitrate")) { + ast_cli(fd, "bitrate is [%d]\n", env->out.bitrate); + } else if (!strcasecmp(var, "qmin")) { + ast_cli(fd, "qmin is [%d]\n", env->out.qmin); + } else if (!strcasecmp(var, "fps")) { + ast_cli(fd, "fps is [%d]\n", env->out.fps); + } else { + return 0; /* unrecognised */ + } + return 1; /* recognised */ +} + +/*! parse config command for video support. */ +static int console_video_config(struct video_desc **penv, + const char *var, const char *val) +{ + struct video_desc *env; + M_START(var, val); + + if (penv == NULL) { + ast_log(LOG_WARNING, "bad argument penv=NULL\n"); + return 1; /* error */ + } + /* allocate the video descriptor first time we get here */ + env = *penv; + if (env == NULL) { + env = *penv = ast_calloc(1, sizeof(struct video_desc)); + if (env == NULL) { + ast_log(LOG_WARNING, "fail to allocate video_desc\n"); + return 1; /* error */ + + } + /* set default values */ + ast_copy_string(env->out.videodevice, "X11", sizeof(env->out.videodevice)); + env->out.fps = 5; + env->out.bitrate = 65000; + env->out.sendvideo = 1; + env->out.qmin = 3; + } + M_STR("videodevice", env->out.videodevice) + M_BOOL("sendvideo", env->out.sendvideo) + M_F("video_size", video_geom(&env->out.enc_in, val)) + M_F("camera_size", video_geom(&env->out.loc_src, val)) + M_F("local_size", video_geom(&env->out.loc_dpy, val)) + M_F("remote_size", video_geom(&env->in.rem_dpy, val)) + M_STR("keypad", env->keypad_file) + M_F("keypad_entry", keypad_cfg_read(&env->gui, val)) + M_STR("keypad_mask", env->keypad_mask) + M_STR("keypad_font", env->keypad_font) + M_UINT("fps", env->out.fps) + M_UINT("bitrate", env->out.bitrate) + M_UINT("qmin", env->out.qmin) + M_STR("videocodec", env->codec_name) + M_END(return 1;) /* the 'nothing found' case */ + return 0; /* found something */ +} +#ifdef _UNDO_M_START +#undef M_START +#undef M_END +#undef M_F +#undef M_BOOL +#undef M_UINT +#undef M_STR +#undef _UNDO_M_START +#endif + +#endif /* video support */ |