/* * copacin.c * * Copyright (c) Chris Putnam 2004-2014 * * Program and source code released under the GPL version 2 * */ #include #include #include #include "is_ws.h" #include "newstr.h" #include "newstr_conv.h" #include "list.h" #include "name.h" #include "title.h" #include "fields.h" #include "reftypes.h" #include "serialno.h" #include "copacin.h" /***************************************************** PUBLIC: void copacin_initparams() *****************************************************/ void copacin_initparams( param *p, const char *progname ) { p->readformat = BIBL_COPACIN; p->charsetin = BIBL_CHARSET_DEFAULT; p->charsetin_src = BIBL_SRC_DEFAULT; p->latexin = 0; p->xmlin = 0; p->utf8in = 0; p->nosplittitle = 0; p->verbose = 0; p->addcount = 0; p->output_raw = 0; p->readf = copacin_readf; p->processf = copacin_processf; p->cleanf = NULL; p->typef = NULL; p->convertf = copacin_convertf; p->all = copac_all; p->nall = copac_nall; list_init( &(p->asis) ); list_init( &(p->corps) ); if ( !progname ) p->progname = NULL; else p->progname = strdup( progname ); } /***************************************************** PUBLIC: int copacin_readf() *****************************************************/ /* Endnote-Refer/Copac tag definition: character 1 = alphabetic character character 2 = alphabetic character character 3 = dash character 4 = space */ static int copacin_istag( char *buf ) { if (! ((buf[0]>='A' && buf[0]<='Z')) || (buf[0]>='a' && buf[0]<='z') ) return 0; if (! ((buf[1]>='A' && buf[1]<='Z')) || (buf[1]>='a' && buf[1]<='z') ) return 0; if (buf[2]!='-' ) return 0; if (buf[3]!=' ' ) return 0; return 1; } static int readmore( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line ) { if ( line->len ) return 1; else return newstr_fget( fp, buf, bufsize, bufpos, line ); } int copacin_readf( FILE *fp, char *buf, int bufsize, int *bufpos, newstr *line, newstr *reference, int *fcharset ) { int haveref = 0, inref=0; char *p; *fcharset = CHARSET_UNKNOWN; while ( !haveref && readmore( fp, buf, bufsize, bufpos, line ) ) { /* blank line separates */ if ( line->data==NULL ) continue; if ( inref && line->len==0 ) haveref=1; p = &(line->data[0]); /* Recognize UTF8 BOM */ if ( line->len > 2 && (unsigned char)(p[0])==0xEF && (unsigned char)(p[1])==0xBB && (unsigned char)(p[2])==0xBF ) { *fcharset = CHARSET_UNICODE; p += 3; } if ( copacin_istag( p ) ) { if ( inref ) newstr_addchar( reference, '\n' ); newstr_strcat( reference, p ); newstr_empty( line ); inref = 1; } else if ( inref ) { if ( p ) { /* copac puts tag only on 1st line */ newstr_addchar( reference, ' ' ); if ( *p ) p++; if ( *p ) p++; if ( *p ) p++; newstr_strcat( reference, p ); } newstr_empty( line ); } else { newstr_empty( line ); } } return haveref; } /***************************************************** PUBLIC: int copacin_processf() *****************************************************/ static char* copacin_addtag2( char *p, newstr *tag, newstr *data ) { int i; i =0; while ( i<3 && *p ) { newstr_addchar( tag, *p++ ); i++; } while ( *p==' ' || *p=='\t' ) p++; while ( *p && *p!='\r' && *p!='\n' ) { newstr_addchar( data, *p ); p++; } newstr_trimendingws( data ); while ( *p=='\n' || *p=='\r' ) p++; return p; } static char * copacin_nextline( char *p ) { while ( *p && *p!='\n' && *p!='\r') p++; while ( *p=='\n' || *p=='\r' ) p++; return p; } int copacin_processf( fields *copacin, char *p, char *filename, long nref ) { newstr tag, data; int status; newstr_init( &tag ); newstr_init( &data ); while ( *p ) { p = skip_ws( p ); if ( copacin_istag( p ) ) { p = copacin_addtag2( p, &tag, &data ); /* don't add empty strings */ if ( tag.len && data.len ) { status = fields_add( copacin, tag.data, data.data, 0 ); if ( status!=FIELDS_OK ) return 0; } newstr_empty( &tag ); newstr_empty( &data ); } else p = copacin_nextline( p ); } newstr_free( &tag ); newstr_free( &data ); return 1; } /***************************************************** PUBLIC: int copacin_convertf(), returns BIBL_OK or BIBL_ERR_MEMERR *****************************************************/ /* copac names appear to always start with last name first, but don't * always seem to have a comma after the name * * editors seem to be stuck in as authors with the tag "[Editor]" in it */ static int copacin_addname( fields *info, char *tag, newstr *name, int level, list *asis, list *corps ) { char *usetag = tag, editor[]="EDITOR"; newstr usename, *s; list tokens; int comma = 0, i, ok; if ( list_find( asis, name->data ) !=-1 || list_find( corps, name->data ) !=-1 ) { ok = name_add( info, tag, name->data, level, asis, corps ); if ( ok ) return BIBL_OK; else return BIBL_ERR_MEMERR; } list_init( &tokens ); newstr_init( &usename ); list_tokenize( &tokens, name, " ", 1 ); for ( i=0; idata, "[Editor]" ) ) { usetag = editor; newstr_strcpy( s, "" ); } else if ( s->len && s->data[s->len-1]==',' ) { comma++; } } if ( comma==0 && tokens.n ) { s = list_get( &tokens, 0 ); newstr_addchar( s, ',' ); } for ( i=0; iverbose ) { if ( p->progname ) fprintf( stderr, "%s: ", p->progname ); fprintf( stderr, "Cannot find tag '%s'\n", tag ); } } static int copacin_simple( fields *out, char *tag, char *value, int level ) { int fstatus = fields_add( out, tag, value, level ); if ( fstatus==FIELDS_OK ) return BIBL_OK; else return BIBL_ERR_MEMERR; } int copacin_convertf( fields *copacin, fields *out, int reftype, param *p, variants *all, int nall ) { int process, level, i, n, nfields, ok, status = BIBL_OK; newstr *tag, *data; char *newtag; nfields = fields_num( copacin ); for ( i=0; idata, reftype, all, nall, &process, &level, &newtag ); if ( n==-1 ) { copacin_report_notag( p, tag->data ); continue; } if ( process == ALWAYS ) continue; /*add these later*/ data = fields_value( copacin, i, FIELDS_STRP ); switch ( process ) { case SIMPLE: status = copacin_simple( out, newtag, data->data, level ); break; case TITLE: ok = title_process( out, newtag, data->data, level, p->nosplittitle ); if ( ok ) status = BIBL_OK; else status = BIBL_ERR_MEMERR; break; case PERSON: status = copacin_addname( out, newtag, data, level, &(p->asis), &(p->corps) ); break; case SERIALNO: ok = addsn( out, data->data, level ); if ( ok ) status = BIBL_OK; else status = BIBL_ERR_MEMERR; break; default: fprintf(stderr,"%s: internal error -- " "illegal process value %d\n", p->progname, process ); status = BIBL_OK; break; } if ( status!=BIBL_OK ) return status; } return status; }