source: trunk/poppler/mypoppler/poppler/ABWOutputDev.cc @ 253

Last change on this file since 253 was 253, checked in by Eugene Romanenko, 13 years ago

PDF plugin: Poppler library updated to version 0.8.4

File size: 45.5 KB
Line 
1//========================================================================
2//
3// ABWOutputDev.cc
4//
5// Jauco Noordzij
6//
7// Based somewhat on HtmlOutputDev.cc
8//
9//========================================================================
10
11#ifdef __GNUC__
12#pragma implementation
13#endif
14
15#include "config.h"
16#include <stdio.h>
17#include <stdlib.h>
18#include <stdarg.h>
19#include <stddef.h>
20#include <ctype.h>
21#include <math.h>
22#include "goo/GooString.h"
23#include "goo/GooList.h"
24#include "UnicodeMap.h"
25#include "goo/gmem.h"
26#include "Error.h"
27#include "GfxState.h"
28#include "GlobalParams.h"
29#include "ABWOutputDev.h"
30#include "PDFDoc.h"
31
32#include <libxml/parser.h>
33#include <libxml/tree.h>
34#include <libxml/xpath.h>
35#include <libxml/xpathInternals.h>
36
37
38// Inter-character space width which will cause addChar to start a new
39// word.
40#define minWordBreakSpace 0.1
41
42// Maximum inter-word spacing, as a fraction of the font size.
43#define maxWordSpacing 1.5
44
45// Max distance between baselines of two lines within a block, as a
46// fraction of the font size.
47#define maxLineSpacingDelta 1.5
48
49#define C_maxVCutValue 4
50#define C_maxHCutValue 5
51//------------------------------------------------------------------------
52// ABWOutputDev
53//------------------------------------------------------------------------
54
55ABWOutputDev::ABWOutputDev(xmlDocPtr ext_doc)
56{
57  pdfdoc = NULL;
58  N_page = N_style = N_text = N_styleset = N_Block = N_word = NULL;
59  doc = ext_doc;
60  N_root = xmlNewNode(NULL, BAD_CAST "abiword");
61  xmlDocSetRootElement(doc, N_root);
62  N_styleset = xmlNewChild(N_root, NULL, BAD_CAST "styles", NULL);
63  N_content = xmlNewChild(N_root, NULL, BAD_CAST "content", NULL);
64  uMap = globalParams->getTextEncoding();
65  maxStyle = Style = 1;
66}
67
68ABWOutputDev::~ABWOutputDev() {
69  xmlCleanupParser();
70}
71
72void ABWOutputDev::startPage(int pageNum, GfxState *state) {
73  /*While reading a pdf page this node acts as a placeholder parent.
74  when conversion is finished and the page is structured as we like it
75  all text fragments are moved from N_page to N_content.*/
76  N_page = xmlNewNode(NULL, BAD_CAST "page");
77  G_pageNum = pageNum;
78} 
79
80/*Callback to denote that poppler reached the end of a page
81here I insert most of the interesting processing stuff*/
82void ABWOutputDev::endPage() {
83  //make sure all words are closed
84  endTextBlock();
85  cleanUpNode(N_page, true);
86  //xmlAddChild(N_content, N_page);
87  //xmlSaveFormatFileEnc("pre-cut.xml", doc, "UTF-8", 1);
88  //xmlUnlinkNode(N_page);
89  //call the top down cutting mechanism
90  recursiveXYC(N_page);
91  //by stopping to worry about creating empty nodes I made the code quite a
92  //bit more robust. This function makes sure we have a nice'n'clean tree
93  cleanUpNode(N_page, true);
94  //xmlAddChild(N_content, N_page);
95  //xmlSaveFormatFileEnc("raw.xml", doc, "UTF-8", 1);
96  //xmlUnlinkNode(N_page);
97 
98  //Interpret the XY tree and infer text blocks and columns
99  interpretXYTree();
100  cleanUpNode(N_page, true);
101  //xmlAddChild(N_content, N_page);
102  //xmlSaveFormatFileEnc("interpreted.xml", doc, "UTF-8", 1);
103  //xmlUnlinkNode(N_page);
104 
105  //I have blocks and columns, this function will turn that into paragraphs and
106  //columns
107  generateParagraphs();
108  cleanUpNode(N_page, true);
109  xmlAddChild(N_content, N_page);
110  N_page = NULL;
111}
112
113void ABWOutputDev::recursiveXYC(xmlNodePtr nodeset) {
114  /*This function implements the recursive XY Cut. basically, it gets
115  the largest piece of whitespace (using getBiggestSeperator()) and then
116  splits the page using splitNodes on that whitespace. It calls itself again
117  with both the halves*/
118  float bhs, bvs, X1, X2, Y1, Y2;
119
120  bvs = getBiggestSeperator(nodeset, VERTICAL, &X1, &X2);
121  bhs = getBiggestSeperator(nodeset, HORIZONTAL, &Y1, &Y2);
122 
123  if (bvs == -1){
124    if (bhs == -1){//both -1
125      //FIXME: add assertions that bvs and bhs are >=-1
126      printf("No seperators\n");
127      return;
128    }
129    else { //only bhs > -1
130      splitNodes(Y1, HORIZONTAL, nodeset, bhs);
131    }
132  }
133  else {
134    if (bhs == -1){//only bvs > -1
135      splitNodes(X1, VERTICAL, nodeset, bvs);
136    }
137    else {//both > -1
138      if (bvs >= (bhs/1.7)){
139        //When people read a text they prefer vertical cuts over horizontal
140        //ones. I'm not that sure about the 1.7 value, but it seems to work.
141        splitNodes(X1, VERTICAL, nodeset, bvs);
142      }
143      else {
144        splitNodes(Y1, HORIZONTAL, nodeset, bhs);
145      }
146    }
147  }
148  recursiveXYC(nodeset->children);
149  recursiveXYC(nodeset->children->next);
150}
151
152void ABWOutputDev::splitNodes(float splitValue, unsigned int direction, xmlNodePtr N_parent, double seperator){
153  //This function takes a nodeset and splits it based on a cut value. It returns
154  //the nodePtr with two childnodes, the both chunks.
155  xmlNodePtr N_move, N_cur, N_newH, N_newL;
156  char * propName;
157  const char *nodeName;
158  char buf[20];
159  if (direction == HORIZONTAL) {
160    propName = "Y1"; 
161    nodeName = "horizontal";
162  }
163  else { 
164    propName = "X1"; 
165    nodeName = "vertical";
166  }
167  N_newH = xmlNewNode(NULL, BAD_CAST nodeName);
168  N_newL = xmlNewNode(NULL, BAD_CAST nodeName);
169  sprintf(buf, "%f", seperator); 
170  xmlNewProp(N_newH, BAD_CAST "diff", BAD_CAST buf);
171  sprintf(buf, "%f", seperator); 
172  xmlNewProp(N_newL, BAD_CAST "diff", BAD_CAST buf);
173  N_cur = N_parent->children;
174  while (N_cur){
175    N_move = N_cur->next;
176    xmlUnlinkNode(N_cur);
177    if (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST propName)) > splitValue){
178      xmlAddChild(N_newH, N_cur);
179    }
180    else {
181      xmlAddChild(N_newL, N_cur);
182    }
183    N_cur = N_move;
184  }
185  xmlAddChild(N_parent, N_newL);
186  xmlAddChild(N_parent, N_newH);
187}
188
189float ABWOutputDev::getBiggestSeperator(xmlNodePtr N_set, unsigned int direction, float * C1, float * C2)
190{
191  int i = 0;
192  int nodeCount = xmlLsCountNode(N_set);
193  float store;
194  int min;
195  float gap, endV;
196  float * stt;
197  float * end;
198  if (nodeCount == 0){
199    //Add assertion that this shouldn't happen
200    fprintf(stderr,"No child nodes");
201    return -1;
202  }
203  stt = new float[nodeCount];
204  end = new float[nodeCount];
205  //store all variables in two arrays (one for start, one for end coordinates)
206  if (direction == VERTICAL) {
207    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
208      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1"));
209      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2"));
210      i++;
211    }
212  }
213  else {
214    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
215      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1"));
216      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2"));
217      i++;
218    }
219  }
220  //Sort them
221  for (i = 0; i < nodeCount - 1; i++){
222    min = i;
223    for (int j = i + 1; j < nodeCount; j++)
224      if (stt[j] < stt[i])
225        min = j;
226    store = stt[i];
227    stt[i] = stt[min];
228    stt[min] = store;
229    store = end[i];
230    end[i] = end[min];
231    end[min] = store;
232  }
233  //find the largest gap
234  gap = -1;
235  endV = end[0];
236  *C1 = 0;
237  *C2 = 0;
238  for (int inspect = 1; inspect < nodeCount; inspect++){
239    //no gap
240    if (((stt[inspect] - endV) - gap) < 0.5){ //FIXME:This is copied almost directly from the previous function, needs checking out
241      //partial overlap instead of complete one
242      if (end[inspect] > endV)
243        endV = end[inspect];
244    }
245    //gap
246    else{
247      //gap is larger than any previous gap
248      if (gap < (stt[inspect] - endV)){
249        gap = stt[inspect] - endV;
250        *C1 = endV;
251        *C2 = stt[inspect];
252      }
253      endV = end[inspect];
254    }
255  }
256  delete[] stt;
257  delete[] end;
258  return gap;
259}
260
261void ABWOutputDev::updateFont(GfxState *state) {
262  char buf[160];
263  xmlNodePtr N_cur;
264  GfxFont *font;
265  bool found = false;
266  bool isBold, isItalic, S_isBold, S_isItalic;
267  isBold = isItalic = S_isBold =  S_isItalic = false;
268  font = state->getFont();
269  GooString *ftName;
270  char *fnEnd, *fnName;
271  int fnStart, ftSize;
272  //the first time this function is called there is no funt.
273  //Fixme: find out if that isn'y a bug
274  if (font){
275    isBold = (font->isBold() || font->getWeight() >6 || (strstr(font->getOrigName()->getCString(), "Bold")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-4)));
276    isItalic =  (font->isItalic() || (strstr(font->getOrigName()->getCString(), "Italic")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-6)));
277    ftSize = int(state->getTransformedFontSize())-1;
278    ftName = new GooString(font->getOrigName());
279    fnStart = strcspn(ftName->getCString(), "+");
280    if (fnStart < ftName->getLength())
281      ftName->del(0,fnStart+1);
282    fnEnd = strrchr(ftName->getCString(), 44);
283    if (fnEnd == 0)
284      fnEnd = strrchr(ftName->getCString(), 45);
285    if (fnEnd != 0)
286      ftName->del(fnEnd-ftName->getCString(),ftName->getLength()-1);
287   
288/*    fnName = ftName;
289    if (isBold or isItalic){
290      fnStart = strcspn(fnName, "+");
291      if (fnStart == font->getOrigName()->getLength())
292        fnStart = 0;
293      else fnStart++;
294
295      fnEnd = strstr(fnName, ",");
296      if (fnEnd == 0)
297        fnEnd = strstr(fnName, "-");
298      if (fnEnd != 0)
299        fnName[fnEnd-fnName] = 0;
300//      char fntName[fnLength];
301//      strncpy (fntName,fnName+fnStart+1,fnLength);
302      fnName+=fnStart;
303//      fnName = fntName;
304    }
305    else {*/
306      fnName = ftName->getCString();
307//    }
308    for (N_cur = N_styleset->children; N_cur; N_cur = N_cur ->next){
309      if (
310       isBold == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "bold"),BAD_CAST "bold;") == 0)
311       &&
312       isItalic == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "italic"),BAD_CAST "italic") == 0)
313       &&
314       xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "font"),BAD_CAST fnName) == 0
315       &&
316       xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size")) == ftSize
317      ) {
318        found = true;
319        Style = int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "id")));
320      }
321    }
322    if (!found){
323      N_cur = xmlNewChild(N_styleset, NULL, BAD_CAST "s", NULL);
324      xmlSetProp(N_cur, BAD_CAST "type", BAD_CAST "P");
325      sprintf(buf, "%d", maxStyle++);
326      xmlSetProp(N_cur, BAD_CAST "name", BAD_CAST buf);
327      xmlSetProp(N_cur, BAD_CAST "id", BAD_CAST buf);
328      Style = maxStyle;
329      sprintf(buf, "%d", ftSize); xmlSetProp(N_cur, BAD_CAST "size", BAD_CAST buf);
330      isBold   ? xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "bold;")  : xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "normal;");
331      isItalic ? xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "italic"): xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "normal");
332      xmlSetProp(N_cur, BAD_CAST "font", BAD_CAST fnName);
333    }
334  }
335}
336
337void ABWOutputDev::drawChar(GfxState *state, double x, double y,
338                        double dx, double dy,
339                        double originX, double originY,
340                        CharCode code, int nBytes, Unicode *u, int uLen)
341{
342  //I wouldn't know what size this should safely be. I guess 64 bytes should be
343  //enough for any unicode character
344  char buf[64];
345  int charLen;
346  x = dx;
347  y = dy;
348  //state->textTransformDelta(dx * state->getHorizScaling(), dy, &dx, &dy);
349  //state->transformDelta(dx, dy, &dx, &dy);
350  if (uLen == 1 && code == 0x20) {
351    //If we break a text sequence on space, then the X1 should be increased
352    //but the Y1 and Y2 should remain the same.
353    beginWord(state,X2+dx,Y2);
354  }
355  else {
356    X2    += dx;
357    Y2    += dy;
358    charLen = uMap->mapUnicode(*u,buf,sizeof(buf));
359    //Getting Unicode to libxml is something I need to fix.
360    //simply passing it using a bad-cast isn't working.
361    //I assume that CharCode code it the U+value of the unicode character
362    //But for a ligature code gives me DF which is the ringel-s, I guess
363    //code should be two bytes wide?
364    xmlNodeAddContentLen(N_word, BAD_CAST buf, charLen);
365  }
366}
367
368void ABWOutputDev::beginString(GfxState *state, GooString *s) {
369  double x,y;
370  //state->textTransform(x, y, &x, &y);
371  state->transform(state->getCurX(), state->getCurY(), &x, &y);
372  if (N_word) {
373    verDist = y-Y2;
374    horDist = x-X2;
375    //TEST:changed fabs(horDist) to horDist
376    //FIXME: this if statement seems awkward to me.
377    if (horDist > (state->getTransformedFontSize()*maxWordSpacing) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
378      beginTextBlock(state,x,y);
379    }
380    else {
381      if ((horDist > (state->getTransformedFontSize()*minWordBreakSpace)) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
382        beginWord(state,x,y);
383      }
384    }
385  }
386  else {
387  //This is the first word. Clear all values and call beginWord;
388    X2 = x;
389    Y2 = y;
390    horDist = 0;
391    verDist = 0;
392    height  = 0;
393    beginTextBlock(state,x,y);
394  }
395}
396
397void ABWOutputDev::endString(GfxState *state) {
398
399}
400
401void ABWOutputDev::beginWord(GfxState *state, double x, double y){
402  char buf[20];
403//  printf("***BREAK!***\n");
404  endWord();
405  X1 = x;
406  Y2 = y;
407
408  horDist = X1-X2;
409  verDist = Y1-Y2;
410
411  X2 = X1;
412  height = state->getFont()->getAscent() * state->getTransformedFontSize();
413  Y1 = Y2-height;
414
415  N_word = xmlNewChild(N_Block, NULL, BAD_CAST "word", NULL);
416  sprintf(buf, "%f", X1); xmlNewProp(N_word, BAD_CAST "X1", BAD_CAST buf);
417  sprintf(buf, "%f", Y1); xmlNewProp(N_word, BAD_CAST "Y1", BAD_CAST buf);
418  sprintf(buf, "%d", Style); xmlNewProp(N_word, BAD_CAST "style", BAD_CAST buf);
419}
420
421void ABWOutputDev::endWord(){
422  char buf[20];
423  if (N_word) {
424    sprintf(buf, "%f", X2);    xmlNewProp(N_word, BAD_CAST "X2", BAD_CAST buf);
425    sprintf(buf, "%f", Y2);    xmlNewProp(N_word, BAD_CAST "Y2", BAD_CAST buf);
426    sprintf(buf, "%f", X2-X1); xmlNewProp(N_word, BAD_CAST "width", BAD_CAST buf);
427    sprintf(buf, "%f", Y2-Y1); xmlNewProp(N_word, BAD_CAST "height", BAD_CAST buf);
428    N_word = NULL;
429  }
430}
431
432void ABWOutputDev::beginTextBlock(GfxState *state, double x, double y){
433  endTextBlock();
434  N_Block = xmlNewChild(N_page, NULL, BAD_CAST "Textblock", NULL);
435  beginWord(state,x,y);
436}
437
438void ABWOutputDev::endTextBlock(){
439  if (N_Block) {
440    endWord();
441    N_Block = NULL; 
442  }
443}
444/*
445This will be a function to retrieve coherent text blocks from the chunk tree.*/
446void ABWOutputDev::interpretXYTree(){
447  xmlNodePtr N_oldPage;
448  N_oldPage = N_page;
449  N_page = xmlNewNode(NULL, BAD_CAST "page");
450  N_column = N_page;
451  //xmlAddChild(N_content, N_page);
452  N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
453  ATP_recursive(N_oldPage);
454}
455
456void ABWOutputDev::ATP_recursive(xmlNodePtr N_parent){
457  xmlNodePtr N_first, N_second, N_line, N_tempCol, N_tempColset;
458
459  N_first  = N_parent->children;
460  if (!N_first)
461    return;
462
463  N_second = N_first->next;
464/*
465  Possibilities:
466  there is one child node
467    Because we cleaned up before the only case where we allow one childnode is
468    within Textblocks and textBlocks within 'vertical' nodes.
469      basically one text node means: add it to the current block.
470  There are two childnodes
471    This can be two verticals, two horizontals or one horizontal and a text node.
472    verticals:
473      If the first is vertical, the second is as well.
474      verticals mean: create a new Block, add a column per vertical make the
475      vertical the block and recurse inside.
476      then make the second vertical the block and recurse inside
477      then finish the block (ie. create a new one)
478    horizontal and or Textblocks
479        if first is textnode
480          add first to block
481          if second is textnode
482            at to block
483          else
484            call again
485        else
486          begin new block
487            call again
488          begin new block
489          if second is text node
490            add to block
491          else
492            call again
493  there are more then two child nodes
494    this can be a number of Textblocks and horizontals
495    add the textNodes to the current Block
496    if a horizontal is encountered enter it and generate a new block afterwards
497  */
498  //fprintf(stderr,"**********************************************************************\n");
499  //xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
500  switch (xmlLsCountNode(N_parent)) {
501  case 1:
502    //fprintf(stderr,"case 1\n");
503    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
504    xmlUnlinkNode(N_first);
505    xmlAddChild(N_line, N_first);
506    break;
507  case 2:
508    //fprintf(stderr,"case 2\n");
509    if (xmlStrcasecmp(N_first->name,BAD_CAST "vertical") == 0){
510      //store the column for the moment
511      N_tempCol = N_column;
512      /*If we have three columns they will turn up in the tree as:
513      <vertical>
514        <vertical/>
515        <vertical/>
516      </vertical>
517      <vertical/>
518      */
519      //if the parent is a vertical as well, we can skip the colset generation
520      //thing here we can also remove the just added column and block, because
521      //these are going to replace them
522      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
523        //fprintf(stderr,"first time column\n");
524        N_tempColset = N_colset;
525        N_colset = xmlNewChild(N_column, NULL, BAD_CAST "colset", NULL);
526        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
527        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
528      }
529      else {
530        //fprintf(stderr,"second time column\n");
531        xmlUnlinkNode(N_column);
532        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
533        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
534      }
535      //fprintf(stderr,"Building first column...\n");
536      ATP_recursive(N_first);
537      N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
538      N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
539      //fprintf(stderr,"Building second column...\n");
540      ATP_recursive(N_second);
541      //make sure we end the column by continuing in the master column and
542      //setting the block and line to it
543      N_column = N_tempCol;
544      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
545        if (N_tempColset != NULL)
546          N_colset = N_tempColset;
547        else
548          fprintf(stderr,"N_templColset should not! be empty (line 823)");//FIXME: add assert
549      }
550    }
551    else {
552      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0) {
553        //fprintf(stderr,"add first as textblock\n");
554        N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
555        xmlUnlinkNode(N_first);
556        xmlAddChild(N_line, N_first);
557        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
558          //fprintf(stderr,"add second as textblock\n");
559          //FIXME: this is not neat. We should ignore the cut ignoring when there are only two elements above
560          //line aggregation doesn't work anyway atm.
561          xmlUnlinkNode(N_second);
562          xmlAddChild(N_line, N_second);
563          //We have two textChunks that are going to be added to the line.
564          //the following statements make the line wrap around both textblocks
565          //if the firstX1 is smaller then the second X1 use the first, else use the second etc.
566        }
567        else {
568          //fprintf(stderr,"recursing into second\n");
569          ATP_recursive(N_second);
570        }
571      }
572      else {
573        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
574        //fprintf(stderr,"recursing into first\n");
575        ATP_recursive(N_first);
576        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
577        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
578          //fprintf(stderr,"add second as textblock\n");
579          N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
580          xmlUnlinkNode(N_second);
581          xmlAddChild(N_line, N_second);
582        }
583        else {
584          //fprintf(stderr,"recursing into second\n");
585          ATP_recursive(N_second);
586        }
587      }
588    }
589    break;
590  default:
591    //double tX1=0, tX2=0, tY1=0, tY2=0;
592    //fprintf(stderr,"case default\n");
593    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
594    while (N_first){
595      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) < tX1 ? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) : tX1 = tX1;
596      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) > tX2 ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) : tX2 = tX2;
597      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) < tY1 ? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) : tY1 = tY1;
598      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) > tY2 ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) : tY1 = tY2;
599      N_second = N_first->next;
600      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0){
601        xmlUnlinkNode(N_first);
602        xmlAddChild(N_line, N_first);
603      }
604      else { //fprintf(stderr,"This shouldn't happen! (line 700)\n");
605      }
606      N_first = N_second;
607    }
608    break;
609  }
610}
611
612/*The cleanup function. It started out as a simple function to remove empty nodes
613so that I could call xmladdnewchildnode as often as I liked so that I wouldn't get seg-faults
614It is now a bit more advanced, makes sure the tree is as it's supposed to be and adds information too*/
615void ABWOutputDev::cleanUpNode(xmlNodePtr N_parent, bool aggregateInfo){
616  double tX1=-1, tX2=-1, tY1=-1, tY2=-1;
617  xmlNodePtr N_cur, N_next;
618  N_cur = N_parent->children;
619  char buf[20];
620  int prevStyle = -1;
621  xmlChar *val;
622  int styleLength = xmlLsCountNode(N_styleset)+1;
623  float stylePos;
624  int *styles = new int[styleLength];
625  for (int i=1; i< styleLength; i++) { styles[i] = 0;}
626  /*
627  ignore two horizontal nodes with textBlocks right underneath them. They
628  signal the end of a chunk, and the horizontal seperation needs to be
629  preserved, because it means they are different lines. The second horizontal
630  therefore needs to be kept.
631  */
632  if ((xmlLsCountNode(N_parent) == 2)
633      &&
634     xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0
635      && 
636     N_cur
637      &&
638     N_cur->next
639      &&
640     xmlStrcasecmp(N_cur->name,BAD_CAST "horizontal") == 0 && xmlStrcasecmp(N_cur->next->name,BAD_CAST "horizontal") == 0
641      &&
642     xmlLsCountNode(N_cur) == 1 && xmlLsCountNode(N_cur->next) == 1
643      &&
644     xmlStrcasecmp(N_cur->children->name,BAD_CAST "Textblock") == 0 && xmlStrcasecmp(N_cur->next->children->name,BAD_CAST "Textblock") == 0
645     ) {
646    xmlAddPrevSibling(N_cur->next,N_cur->children); 
647    xmlUnlinkNode(N_cur);
648  } 
649  /*
650  This removes columns if one of the parts is actually a single letter.
651  I found out I liked the columns better, so I have the code commented out.
652  */
653/*  else if ((xmlLsCountNode(N_parent) == 2)
654             &&
655            N_cur
656             &&
657            N_cur->next
658             &&
659            xmlStrcasecmp(N_cur->name,BAD_CAST "vertical") == 0
660             &&
661            xmlStrcasecmp(N_cur->next->name,BAD_CAST "vertical") == 0
662             &&
663            (N_cur->children)
664             &&
665            (N_cur->children->children)
666             &&
667            (N_cur->children->children->children)
668             &&
669            xmlStrlen(N_cur->children->children->children->content) == 1) {
670    N_next = N_cur->next;
671    xmlAddChild(N_parent, N_next->children);
672    xmlAddPrevSibling(N_next->children->children, N_cur->children);
673    xmlUnlinkNode(N_cur);
674    xmlUnlinkNode(N_next);
675  } */else {
676    while (N_cur){
677      N_next = N_cur->next;
678      cleanUpNode(N_cur, aggregateInfo);
679      if (xmlLsCountNode(N_cur) == 0 && (xmlStrcasecmp(N_cur->name,BAD_CAST "cbr") != 0) && (xmlStrcasecmp(N_cur->name,BAD_CAST "s") != 0))
680        xmlUnlinkNode(N_cur);
681      //If the node is still around
682      N_cur = N_next;
683    }
684  }
685  //If a countainer element has only one child, it can be removed except for vertical
686  //cuts with only one textElement;
687  //the main reason for this code is to remove the crumbs after cleaning up in the loop above
688  if ((xmlLsCountNode(N_parent) == 1) && ((xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0) || ((xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") == 0) && (xmlStrcasecmp(N_parent->children->name,BAD_CAST "Textblock") != 0)))){
689    N_cur = N_parent->children;
690    xmlAddPrevSibling(N_parent,N_cur);
691    xmlUnlinkNode(N_parent);
692  }
693  //We cannot remove the page element so if it has only one childnode, we remove that childnode instead
694  if ((xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0) && (xmlLsCountNode(N_parent) == 1)) {
695    N_cur = N_parent->children->children;
696    while (N_cur){
697      N_next = N_cur->next;
698      xmlUnlinkNode(N_cur);
699      xmlAddChild(N_parent, N_cur);
700      N_cur = N_next;
701    }
702    xmlUnlinkNode(N_parent->children);
703  }
704  //Ok, so by this time the N_parent and his children are guaranteed to be clean
705  //this for loop gets information from the 'word' elements and propagates it up
706  //the tree.
707  if (aggregateInfo && xmlStrcasecmp(N_parent->name,BAD_CAST "word") != 0) {
708    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
709      val = xmlGetProp(N_cur,BAD_CAST "style");
710      stylePos = xmlXPathCastStringToNumber(val);
711      //fprintf(stderr,"1: %f, %d\n",stylePos,int(stylePos));
712      styles[int(stylePos)]=styles[int(stylePos)]+1;
713      //fprintf(stderr,"2: styles[%d] = %d\n",int(stylePos),styles[int(stylePos)]);
714      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) < tX1 || tX1 == -1)? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) : tX1 = tX1;
715      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) > tX2)             ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) : tX2 = tX2;
716      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) < tY1 || tY1 == -1)? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) : tY1 = tY1;
717      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) > tY2)             ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) : tY2 = tY2;
718    }
719    sprintf(buf, "%f", tX1);     xmlSetProp(N_parent, BAD_CAST "X1", BAD_CAST buf);
720    sprintf(buf, "%f", tX2);     xmlSetProp(N_parent, BAD_CAST "X2", BAD_CAST buf);
721    sprintf(buf, "%f", tY1);     xmlSetProp(N_parent, BAD_CAST "Y1", BAD_CAST buf);
722    sprintf(buf, "%f", tY2);     xmlSetProp(N_parent, BAD_CAST "Y2", BAD_CAST buf);
723    sprintf(buf, "%f", tX2-tX1); xmlSetProp(N_parent, BAD_CAST "width", BAD_CAST buf);
724    sprintf(buf, "%f", tY2-tY1); xmlSetProp(N_parent, BAD_CAST "height", BAD_CAST buf);
725    prevStyle = 0;
726    styles[0] = -1;
727    for (int i=1; i< styleLength; i++) { if (styles[i] > styles[prevStyle]) prevStyle = i; }
728    //fprintf(stderr,"%d\n", prevStyle);
729    if (prevStyle > 0){
730      sprintf(buf, "%d", prevStyle);     xmlSetProp(N_parent, BAD_CAST "style", BAD_CAST buf);
731    }
732  }
733  if (N_parent->children && xmlStrcasecmp(N_parent->children->name,BAD_CAST "line") == 0 && xmlGetProp(N_parent->children,BAD_CAST "alignment") != NULL)
734    xmlSetProp(N_parent, BAD_CAST "alignment", xmlGetProp(N_parent->children,BAD_CAST "alignment"));
735
736   delete styles;
737}
738
739void ABWOutputDev::generateParagraphs() {
740  xmlNodePtr N_cur, N_parent, N_p, N_line, N_next;
741  int lvl;
742  //basically I first detect the text-alignment within blocks.
743  //ASSUMPTION: my block seperation thing is good enough so I don't need to
744  //worry about two alignments in one paragraph
745 
746  X1 = 0;
747  X2 = pdfdoc->getPageCropWidth(G_pageNum);
748  Y1 = 0;
749  Y2 = pdfdoc->getPageCropHeight(G_pageNum);
750  addAlignment(N_page);
751 
752  //then it's a switch per alignement
753  N_cur = N_page->children;
754  N_parent = N_page;
755  lvl = 1;
756  while (N_cur) {
757    if (xmlStrcasecmp(N_cur->name,BAD_CAST "chunk") == 0){
758      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
759      xmlAddPrevSibling(N_cur,N_p);
760      //N_p = xmlNewChild(N_parent, NULL, BAD_CAST "chunk", NULL);
761      //A new paragraph is created when:
762      switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "alignment")))){
763      //left
764      case 1: //the distance between the texblock X2 and the last word X2 is more than
765         //the following first word width.
766         N_line = N_cur->children;
767         while (N_line){
768           N_next = N_line->next;
769           xmlUnlinkNode(N_line);
770           xmlAddChild(N_p,N_line);
771           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
772           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
773             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
774               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
775               xmlAddPrevSibling(N_cur,N_p);
776             }
777           }
778           N_line = N_next;
779         }
780         break;
781      //right
782      case 2: //the same but now with X1 and first word and following last word
783         N_line = N_cur->children;
784         while (N_line){
785           N_next = N_line->next;
786           xmlUnlinkNode(N_line);
787           xmlAddChild(N_p,N_line);
788           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
789           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
790             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
791             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
792               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
793               xmlAddPrevSibling(N_cur,N_p);
794             }
795           }
796           N_line = N_next;
797         }
798         break;
799      //centered
800      case 3: //the combined left and right space is more than the following first word
801         N_line = N_cur->children;
802         while (N_line){
803           N_next = N_line->next;
804           xmlUnlinkNode(N_line);
805           xmlAddChild(N_p,N_line);
806           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
807           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
808             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
809             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
810               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
811               xmlAddPrevSibling(N_cur,N_p);
812             }
813           }
814           N_line = N_next;
815         }
816         break;
817      //justified
818      case 4:
819         //we break on all alignment=1 lines. A line with alignment=1 that is the first of a block will
820         //also initiate a paragraph break before.
821         N_line = N_cur->children;
822         if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
823           N_p = xmlNewNode(NULL, BAD_CAST "chunk");
824           xmlAddPrevSibling(N_cur,N_p);
825         }
826         while (N_line){
827           N_next = N_line->next;
828           xmlUnlinkNode(N_line);
829           xmlAddChild(N_p,N_line);
830           if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
831             N_p = xmlNewNode(NULL, BAD_CAST "chunk");
832             xmlAddPrevSibling(N_cur,N_p);
833           }
834           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
835           N_line = N_next;
836         }
837         break;
838      }
839    }
840    else if (xmlStrcasecmp(N_cur->name,BAD_CAST "colset") == 0 || xmlStrcasecmp(N_cur->name,BAD_CAST "column") == 0){
841      N_parent = N_cur;
842      N_cur = N_cur->children;
843      lvl++;
844      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
845      xmlAddPrevSibling(N_cur,N_p);
846      continue;
847    }
848    if (N_cur->next)
849      N_cur = N_cur->next;
850    else while (lvl > 0){
851      N_cur = N_parent;
852      N_parent = N_cur->parent;
853      lvl--;
854      if (N_cur->next){
855        N_cur = N_cur->next;
856        break;
857      }
858    }
859    if (lvl==0)
860      N_cur = NULL;
861  }
862}
863
864//function that adds an 'alignment=' property to the <chunk>s
865void ABWOutputDev::addAlignment(xmlNodePtr N_parent) {
866  xmlNodePtr N_chunk, N_line;
867  double tX1, tX2;
868  bool leftMatch, rightMatch, centerMatch;
869  int leftCnt = 0, rightCnt = 0, cntrCnt = 0, justCnt = 0;
870  //fprintf(stderr,"Entering addAlignment\n");
871  for (N_chunk = N_parent->children; N_chunk; N_chunk = N_chunk->next) {
872    if (xmlStrcasecmp(N_chunk->name,BAD_CAST "chunk") == 0){
873      X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
874      X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
875      //fprintf(stderr,"Found chunk\n");
876      //if the chunk contains only one line, we don't need to loop through it.
877      if (xmlLsCountNode(N_chunk) == 1){
878        //fprintf(stderr,"Processing line\n");
879        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")));
880        //fprintf(stderr,"%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
881        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
882        // a one line chunk, is either centered or left or right-aligned.
883        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))) > 1) {
884          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
885          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "2");
886          //fprintf(stderr,"alignment = right\n");
887        }
888        else { 
889        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")))< -1) {
890          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
891          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "1");
892          //fprintf(stderr,"alignment = left\n");
893        }
894        else {
895          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
896          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "3");
897          //fprintf(stderr,"alignment = center\n");
898        }
899        }
900      }
901      else {
902      leftCnt = 0;
903      rightCnt = 0;
904      cntrCnt = 0;
905      justCnt = 0;
906      for (N_line = N_chunk->children; N_line; N_line = N_line->next) {
907        //fprintf(stderr,"Processing line\n");
908        /*
909        |X1 - cX1| == 1
910        |X2 - cX2| == 1
911        |(cX1-X1)-(X2-cX2)| == 1
912        ok, each line can be just as wide as the current set,
913        it can be smaller and moved to the right
914        it can be smaller and moved to the left.
915        it can
916        */
917        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")));
918        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))));
919        leftMatch =  fabs(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1) < 2;
920        rightMatch =  fabs(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))) < 2;
921        centerMatch =  fabs((xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")))) < 2;
922        if (leftMatch && rightMatch) {
923          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
924          justCnt++;
925        }
926        else if (centerMatch) {
927          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
928          cntrCnt++;
929        }
930        else if (rightMatch) {
931          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
932          rightCnt++;
933        }
934        else {
935          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
936          leftCnt++;
937        }
938      }
939      //there is almost always one justified line in a centered text
940      //and most justified blocks have at least one left aligned line
941      //fprintf(stderr,"1:%d ,2:%d ,3:%d ,4:%d\n",leftCnt,justCnt,cntrCnt,rightCnt);
942      if ((leftCnt-1 >= justCnt) && (leftCnt >= rightCnt) && (leftCnt >= cntrCnt))
943        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
944      else if ((justCnt >= leftCnt-1) && (justCnt >= rightCnt) && (justCnt >= cntrCnt))
945        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "4");
946      else if ((cntrCnt >= justCnt-1) && (cntrCnt >= rightCnt) && (cntrCnt >= leftCnt))
947        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
948      else
949        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
950      }
951    } 
952    else {
953      if (xmlStrcasecmp(N_chunk->name,BAD_CAST "colset") == 0){
954        //fprintf(stderr,"Found a colset\n");
955        addAlignment(N_chunk);
956      }
957      else {
958        if (xmlStrcasecmp(N_chunk->name,BAD_CAST "column") == 0){
959          //fprintf(stderr,"Found a column\n");
960          tX1 = X1;
961          tX2 = X2;
962          X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
963          X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
964          addAlignment(N_chunk);
965          X1 = tX1;
966          X2 = tX2;
967        }
968        else { //fprintf(stderr,"Found something else\n");
969        }
970      }
971    }
972  }
973//parse all blocks, and all lines within all blocks
974//do a set of checks and tick a flag if the check fails
975//check for line X1 is textBlock X1
976//check for line X2 is textblock X2
977//check if line is centered in textBock (LX1 != TX1 && LX2 != TX2 && LX1-TX1 == TX2=LX2)
978//if the LX1 != TX1 then how much is the difference?
979//a line isn't left aligned if all lines have a different X1 <= not so strong assumption.
980
981//justified if both are straight except for a couple of (same factor sized) indents at the left
982//else centered if above calculation is correct
983//else left aligned if left side is more straight than right (more lines in the same X1 or common factor
984//else right
985}
986
987void ABWOutputDev::setPDFDoc(PDFDoc *priv_pdfdoc) {
988  pdfdoc = priv_pdfdoc;
989}
990
991void ABWOutputDev::createABW() {
992  //*************************************************************
993  //change styles to abiword format
994  xmlNodePtr N_cur, N_next;
995  xmlAttrPtr N_prop;
996  char buf[500];
997  for (N_cur = N_styleset->children; N_cur; N_cur = N_cur->next){
998    sprintf(buf,"margin-top:0pt; color:000000; margin-left:0pt; text-position:normal; widows:2; text-indent:0in; font-variant:normal; margin-right:0pt; lang:nl-NL; line-height:1.0; font-size:%dpt; text-decoration:none; margin-bottom:0pt; bgcolor:transparent; text-align:left; font-stretch:normal;",int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size"))));
999    strncat(buf,"font-family:",12);
1000    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "font"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "font")));
1001    strncat(buf,";",1);
1002    strncat(buf,"font-weight:",12);
1003    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "bold"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "bold")));
1004    strncat(buf,"font-style:",12);
1005    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "italic"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "italic")));
1006    xmlSetProp(N_cur, BAD_CAST "props", BAD_CAST buf);
1007    N_prop = xmlHasProp(N_cur, BAD_CAST "id");
1008    if (N_prop != NULL) xmlRemoveProp(N_prop);
1009    N_prop = xmlHasProp(N_cur, BAD_CAST "size");
1010    if (N_prop != NULL) xmlRemoveProp(N_prop);
1011    N_prop = xmlHasProp(N_cur, BAD_CAST "bold");
1012    if (N_prop != NULL) xmlRemoveProp(N_prop);
1013    N_prop = xmlHasProp(N_cur, BAD_CAST "italic");
1014    if (N_prop != NULL) xmlRemoveProp(N_prop);
1015    N_prop = xmlHasProp(N_cur, BAD_CAST "font");
1016    if (N_prop != NULL) xmlRemoveProp(N_prop);
1017  }
1018  //*************************************************************
1019  //Change the rest of the document
1020  //each child of N_content is a page
1021  N_cur = N_content->children;
1022  while (N_cur){
1023    //we creat a section node and attach it to the root, it will com after all
1024    //the page nodes. Then we transform the page, and finally remove it
1025    N_next = N_cur->next;
1026    //fprintf(stderr,"***Transforming page\n");
1027    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1028    transformPage(N_cur);
1029    xmlUnlinkNode(N_cur);
1030    //fprintf(stderr,"***Finished transforming page\n");
1031    N_cur = N_next;
1032  }
1033  cleanUpNode(N_root, false);
1034}
1035
1036void ABWOutputDev::transformPage(xmlNodePtr N_parent){
1037  char buf[60];
1038  xmlNodePtr N_cur, N_curLine, N_curText, N_curWord, text, space;
1039  //translate the nodes into abiword nodes
1040  if (xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0){
1041    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1042      //fprintf(stderr,"**pass a page child\n");
1043      transformPage(N_cur);
1044    }
1045  }
1046  if (xmlStrcasecmp(N_parent->name,BAD_CAST "chunk") == 0){
1047    //fprintf(stderr,"Found a chunk\n");
1048    //I start a <p> on each chunk and add all word containment
1049    N_text = xmlNewChild(N_Block, NULL, BAD_CAST "p", NULL);
1050    if (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "style"))) > 0){
1051      xmlNewProp(N_text, BAD_CAST "style", xmlGetProp(N_parent,BAD_CAST "style"));
1052    }
1053    switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "alignment")))){
1054    case 1: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:left");
1055           break;
1056    case 2: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:right");
1057           break;
1058    case 3: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:center");
1059           break;
1060    case 4: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:justify");
1061           break;
1062    }
1063    for (N_curLine = N_parent->children; N_curLine; N_curLine = N_curLine->next){
1064      //fprintf(stderr,"A line\n");
1065      for (N_curText = N_curLine->children; N_curText; N_curText = N_curText->next){
1066        //fprintf(stderr,"a textNode\n");
1067        for (N_curWord = N_curText->children; N_curWord; N_curWord = N_curWord->next){
1068          //fprintf(stderr,"a word\n");
1069          text = N_curWord->children;
1070          xmlUnlinkNode(text);
1071          xmlAddChild(N_text,text);
1072          space = xmlNewText(BAD_CAST " ");
1073          xmlAddChild(N_text,space);
1074        }
1075      }
1076    }
1077  }
1078  if (xmlStrcasecmp(N_parent->name,BAD_CAST "column") == 0){
1079    //fprintf(stderr,"Found a column\n");
1080    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1081      transformPage(N_cur);
1082    }
1083    xmlNewChild(N_text, NULL, BAD_CAST "cbr", NULL);
1084  }
1085  if (xmlStrcasecmp(N_parent->name,BAD_CAST "colset") == 0){
1086    //fprintf(stderr,"Found a colset\n");
1087    //create new section columns: count childNodes of N_cur
1088    //recurse through chunks and create textNodes
1089    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1090    sprintf(buf,"columns:%d",xmlLsCountNode(N_parent));
1091    xmlNewProp(N_Block, BAD_CAST "props", BAD_CAST buf);
1092    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1093      transformPage(N_cur);
1094    }
1095    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1096  }
1097  //fprintf(stderr,"at the end\n");
1098}
1099
1100//Count nodes, copied from debugxml.c from libxml
1101// libxml copyright file below
1102/*
1103Except where otherwise noted in the source code (e.g. the files hash.c,
1104list.c and the trio files, which are covered by a similar licence but
1105with different Copyright notices) all the files are:
1106
1107 Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
1108
1109Permission is hereby granted, free of charge, to any person obtaining a copy
1110of this software and associated documentation files (the "Software"), to deal
1111in the Software without restriction, including without limitation the rights
1112to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1113copies of the Software, and to permit persons to whom the Software is fur-
1114nished to do so, subject to the following conditions:
1115
1116The above copyright notice and this permission notice shall be included in
1117all copies or substantial portions of the Software.
1118
1119THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1120IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
1121NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
1122DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1123IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON-
1124NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1125
1126Except as contained in this notice, the name of Daniel Veillard shall not
1127be used in advertising or otherwise to promote the sale, use or other deal-
1128ings in this Software without prior written authorization from him.
1129*/
1130int ABWOutputDev::xmlLsCountNode(xmlNodePtr node) {
1131  int ret = 0;
1132  xmlNodePtr list = NULL;
1133
1134  if (node == NULL)
1135    return(0);
1136
1137  switch (node->type) {
1138    case XML_ELEMENT_NODE:
1139      list = node->children;
1140      break;
1141    case XML_DOCUMENT_NODE:
1142    case XML_HTML_DOCUMENT_NODE:
1143#ifdef LIBXML_DOCB_ENABLED
1144    case XML_DOCB_DOCUMENT_NODE:
1145#endif
1146      list = ((xmlDocPtr) node)->children;
1147      break;
1148    case XML_ATTRIBUTE_NODE:
1149      list = ((xmlAttrPtr) node)->children;
1150      break;
1151    case XML_TEXT_NODE:
1152    case XML_CDATA_SECTION_NODE:
1153    case XML_PI_NODE:
1154    case XML_COMMENT_NODE:
1155      if (node->content != NULL) {
1156        ret = xmlStrlen(node->content);
1157      }
1158      break;
1159    case XML_ENTITY_REF_NODE:
1160    case XML_DOCUMENT_TYPE_NODE:
1161    case XML_ENTITY_NODE:
1162    case XML_DOCUMENT_FRAG_NODE:
1163    case XML_NOTATION_NODE:
1164    case XML_DTD_NODE:
1165    case XML_ELEMENT_DECL:
1166    case XML_ATTRIBUTE_DECL:
1167    case XML_ENTITY_DECL:
1168    case XML_NAMESPACE_DECL:
1169    case XML_XINCLUDE_START:
1170    case XML_XINCLUDE_END:
1171      ret = 1;
1172      break;
1173  }
1174  for (;list != NULL;ret++) 
1175    list = list->next;
1176  return(ret);
1177}
Note: See TracBrowser for help on using the repository browser.