source: trunk/poppler/mypoppler/poppler/ABWOutputDev.cc @ 461

Last change on this file since 461 was 277, checked in by rbri, 12 years ago

PDF plugin: Poppler library updated to version 0.12.3

File size: 45.6 KB
Line 
1//========================================================================
2//
3// ABWOutputDev.cc
4//
5// Copyright 2006-2007 Jauco Noordzij <jauco@jauco.nl>
6// Copyright 2007 Dominic Lachowicz <cinamod@hotmail.com>
7// Copyright 2008 Hib Eris <hib@hiberis.nl>
8//
9// Based somewhat on HtmlOutputDev.cc
10//
11//========================================================================
12
13#ifdef __GNUC__
14#pragma implementation
15#endif
16
17#include "config.h"
18#include <stdio.h>
19#include <stdlib.h>
20#include <stdarg.h>
21#include <stddef.h>
22#include <ctype.h>
23#include <math.h>
24#include "goo/GooString.h"
25#include "goo/GooList.h"
26#include "UnicodeMap.h"
27#include "goo/gmem.h"
28#include "Error.h"
29#include "GfxState.h"
30#include "GlobalParams.h"
31#include "ABWOutputDev.h"
32#include "PDFDoc.h"
33
34#include <libxml/parser.h>
35#include <libxml/tree.h>
36#include <libxml/xpath.h>
37#include <libxml/xpathInternals.h>
38
39
40// Inter-character space width which will cause addChar to start a new
41// word.
42#define minWordBreakSpace 0.1
43
44// Maximum inter-word spacing, as a fraction of the font size.
45#define maxWordSpacing 1.5
46
47// Max distance between baselines of two lines within a block, as a
48// fraction of the font size.
49#define maxLineSpacingDelta 1.5
50
51#define C_maxVCutValue 4
52#define C_maxHCutValue 5
53//------------------------------------------------------------------------
54// ABWOutputDev
55//------------------------------------------------------------------------
56
57ABWOutputDev::ABWOutputDev(xmlDocPtr ext_doc)
58{
59  pdfdoc = NULL;
60  N_page = N_style = N_text = N_styleset = N_Block = N_word = NULL;
61  doc = ext_doc;
62  N_root = xmlNewNode(NULL, BAD_CAST "abiword");
63  xmlDocSetRootElement(doc, N_root);
64  N_styleset = xmlNewChild(N_root, NULL, BAD_CAST "styles", NULL);
65  N_content = xmlNewChild(N_root, NULL, BAD_CAST "content", NULL);
66  uMap = globalParams->getTextEncoding();
67  maxStyle = Style = 1;
68}
69
70ABWOutputDev::~ABWOutputDev() {
71  xmlCleanupParser();
72}
73
74void ABWOutputDev::startPage(int pageNum, GfxState *state) {
75  /*While reading a pdf page this node acts as a placeholder parent.
76  when conversion is finished and the page is structured as we like it
77  all text fragments are moved from N_page to N_content.*/
78  N_page = xmlNewNode(NULL, BAD_CAST "page");
79  G_pageNum = pageNum;
80} 
81
82/*Callback to denote that poppler reached the end of a page
83here I insert most of the interesting processing stuff*/
84void ABWOutputDev::endPage() {
85  //make sure all words are closed
86  endTextBlock();
87  cleanUpNode(N_page, true);
88  //xmlAddChild(N_content, N_page);
89  //xmlSaveFormatFileEnc("pre-cut.xml", doc, "UTF-8", 1);
90  //xmlUnlinkNode(N_page);
91  //call the top down cutting mechanism
92  recursiveXYC(N_page);
93  //by stopping to worry about creating empty nodes I made the code quite a
94  //bit more robust. This function makes sure we have a nice'n'clean tree
95  cleanUpNode(N_page, true);
96  //xmlAddChild(N_content, N_page);
97  //xmlSaveFormatFileEnc("raw.xml", doc, "UTF-8", 1);
98  //xmlUnlinkNode(N_page);
99 
100  //Interpret the XY tree and infer text blocks and columns
101  interpretXYTree();
102  cleanUpNode(N_page, true);
103  //xmlAddChild(N_content, N_page);
104  //xmlSaveFormatFileEnc("interpreted.xml", doc, "UTF-8", 1);
105  //xmlUnlinkNode(N_page);
106 
107  //I have blocks and columns, this function will turn that into paragraphs and
108  //columns
109  generateParagraphs();
110  cleanUpNode(N_page, true);
111  xmlAddChild(N_content, N_page);
112  N_page = NULL;
113}
114
115void ABWOutputDev::recursiveXYC(xmlNodePtr nodeset) {
116  /*This function implements the recursive XY Cut. basically, it gets
117  the largest piece of whitespace (using getBiggestSeperator()) and then
118  splits the page using splitNodes on that whitespace. It calls itself again
119  with both the halves*/
120  float bhs, bvs, X1, X2, Y1, Y2;
121
122  bvs = getBiggestSeperator(nodeset, VERTICAL, &X1, &X2);
123  bhs = getBiggestSeperator(nodeset, HORIZONTAL, &Y1, &Y2);
124 
125  if (bvs == -1){
126    if (bhs == -1){//both -1
127      //FIXME: add assertions that bvs and bhs are >=-1
128      printf("No seperators\n");
129      return;
130    }
131    else { //only bhs > -1
132      splitNodes(Y1, HORIZONTAL, nodeset, bhs);
133    }
134  }
135  else {
136    if (bhs == -1){//only bvs > -1
137      splitNodes(X1, VERTICAL, nodeset, bvs);
138    }
139    else {//both > -1
140      if (bvs >= (bhs/1.7)){
141        //When people read a text they prefer vertical cuts over horizontal
142        //ones. I'm not that sure about the 1.7 value, but it seems to work.
143        splitNodes(X1, VERTICAL, nodeset, bvs);
144      }
145      else {
146        splitNodes(Y1, HORIZONTAL, nodeset, bhs);
147      }
148    }
149  }
150  recursiveXYC(nodeset->children);
151  recursiveXYC(nodeset->children->next);
152}
153
154void ABWOutputDev::splitNodes(float splitValue, unsigned int direction, xmlNodePtr N_parent, double seperator){
155  //This function takes a nodeset and splits it based on a cut value. It returns
156  //the nodePtr with two childnodes, the both chunks.
157  xmlNodePtr N_move, N_cur, N_newH, N_newL;
158  char * propName;
159  const char *nodeName;
160  char buf[20];
161  if (direction == HORIZONTAL) {
162    propName = "Y1"; 
163    nodeName = "horizontal";
164  }
165  else { 
166    propName = "X1"; 
167    nodeName = "vertical";
168  }
169  N_newH = xmlNewNode(NULL, BAD_CAST nodeName);
170  N_newL = xmlNewNode(NULL, BAD_CAST nodeName);
171  sprintf(buf, "%f", seperator); 
172  xmlNewProp(N_newH, BAD_CAST "diff", BAD_CAST buf);
173  sprintf(buf, "%f", seperator); 
174  xmlNewProp(N_newL, BAD_CAST "diff", BAD_CAST buf);
175  N_cur = N_parent->children;
176  while (N_cur){
177    N_move = N_cur->next;
178    xmlUnlinkNode(N_cur);
179    if (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST propName)) > splitValue){
180      xmlAddChild(N_newH, N_cur);
181    }
182    else {
183      xmlAddChild(N_newL, N_cur);
184    }
185    N_cur = N_move;
186  }
187  xmlAddChild(N_parent, N_newL);
188  xmlAddChild(N_parent, N_newH);
189}
190
191float ABWOutputDev::getBiggestSeperator(xmlNodePtr N_set, unsigned int direction, float * C1, float * C2)
192{
193  int i = 0;
194  int nodeCount = xmlLsCountNode(N_set);
195  float store;
196  int min;
197  float gap, endV;
198  float * stt;
199  float * end;
200  if (nodeCount == 0){
201    //Add assertion that this shouldn't happen
202    fprintf(stderr,"No child nodes");
203    return -1;
204  }
205  stt = new float[nodeCount];
206  end = new float[nodeCount];
207  //store all variables in two arrays (one for start, one for end coordinates)
208  if (direction == VERTICAL) {
209    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
210      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1"));
211      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2"));
212      i++;
213    }
214  }
215  else {
216    for (xmlNodePtr N_cur = N_set->children; N_cur != NULL; N_cur = N_cur->next){
217      stt[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1"));
218      end[i] = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2"));
219      i++;
220    }
221  }
222  //Sort them
223  for (i = 0; i < nodeCount - 1; i++){
224    min = i;
225    for (int j = i + 1; j < nodeCount; j++)
226      if (stt[j] < stt[i])
227        min = j;
228    store = stt[i];
229    stt[i] = stt[min];
230    stt[min] = store;
231    store = end[i];
232    end[i] = end[min];
233    end[min] = store;
234  }
235  //find the largest gap
236  gap = -1;
237  endV = end[0];
238  *C1 = 0;
239  *C2 = 0;
240  for (int inspect = 1; inspect < nodeCount; inspect++){
241    //no gap
242    if (((stt[inspect] - endV) - gap) < 0.5){ //FIXME:This is copied almost directly from the previous function, needs checking out
243      //partial overlap instead of complete one
244      if (end[inspect] > endV)
245        endV = end[inspect];
246    }
247    //gap
248    else{
249      //gap is larger than any previous gap
250      if (gap < (stt[inspect] - endV)){
251        gap = stt[inspect] - endV;
252        *C1 = endV;
253        *C2 = stt[inspect];
254      }
255      endV = end[inspect];
256    }
257  }
258  delete[] stt;
259  delete[] end;
260  return gap;
261}
262
263void ABWOutputDev::updateFont(GfxState *state) {
264  char buf[160];
265  xmlNodePtr N_cur;
266  GfxFont *font;
267  bool found = false;
268  bool isBold, isItalic, S_isBold, S_isItalic;
269  isBold = isItalic = S_isBold =  S_isItalic = false;
270  font = state->getFont();
271  GooString *ftName;
272  char *fnEnd, *fnName;
273  int fnStart, ftSize;
274  //the first time this function is called there is no funt.
275  //Fixme: find out if that isn'y a bug
276  if (font){
277    isBold = (font->isBold() || font->getWeight() >6 || (strstr(font->getOrigName()->getCString(), "Bold")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-4)));
278    isItalic =  (font->isItalic() || (strstr(font->getOrigName()->getCString(), "Italic")-font->getOrigName()->getCString() == (font->getOrigName()->getLength()-6)));
279    ftSize = int(state->getTransformedFontSize())-1;
280    ftName = new GooString(font->getOrigName());
281    fnStart = strcspn(ftName->getCString(), "+");
282    if (fnStart < ftName->getLength())
283      ftName->del(0,fnStart+1);
284    fnEnd = strrchr(ftName->getCString(), 44);
285    if (fnEnd == 0)
286      fnEnd = strrchr(ftName->getCString(), 45);
287    if (fnEnd != 0)
288      ftName->del(fnEnd-ftName->getCString(),ftName->getLength()-1);
289   
290/*    fnName = ftName;
291    if (isBold or isItalic){
292      fnStart = strcspn(fnName, "+");
293      if (fnStart == font->getOrigName()->getLength())
294        fnStart = 0;
295      else fnStart++;
296
297      fnEnd = strstr(fnName, ",");
298      if (fnEnd == 0)
299        fnEnd = strstr(fnName, "-");
300      if (fnEnd != 0)
301        fnName[fnEnd-fnName] = 0;
302//      char fntName[fnLength];
303//      strncpy (fntName,fnName+fnStart+1,fnLength);
304      fnName+=fnStart;
305//      fnName = fntName;
306    }
307    else {*/
308      fnName = ftName->getCString();
309//    }
310    for (N_cur = N_styleset->children; N_cur; N_cur = N_cur ->next){
311      if (
312       isBold == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "bold"),BAD_CAST "bold;") == 0)
313       &&
314       isItalic == (xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "italic"),BAD_CAST "italic") == 0)
315       &&
316       xmlStrcasecmp(xmlGetProp(N_cur,BAD_CAST "font"),BAD_CAST fnName) == 0
317       &&
318       xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size")) == ftSize
319      ) {
320        found = true;
321        Style = int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "id")));
322      }
323    }
324    if (!found){
325      N_cur = xmlNewChild(N_styleset, NULL, BAD_CAST "s", NULL);
326      xmlSetProp(N_cur, BAD_CAST "type", BAD_CAST "P");
327      sprintf(buf, "%d", maxStyle++);
328      xmlSetProp(N_cur, BAD_CAST "name", BAD_CAST buf);
329      xmlSetProp(N_cur, BAD_CAST "id", BAD_CAST buf);
330      Style = maxStyle;
331      sprintf(buf, "%d", ftSize); xmlSetProp(N_cur, BAD_CAST "size", BAD_CAST buf);
332      isBold   ? xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "bold;")  : xmlSetProp(N_cur, BAD_CAST "bold", BAD_CAST "normal;");
333      isItalic ? xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "italic"): xmlSetProp(N_cur, BAD_CAST "italic", BAD_CAST "normal");
334      xmlSetProp(N_cur, BAD_CAST "font", BAD_CAST fnName);
335    }
336  }
337}
338
339void ABWOutputDev::drawChar(GfxState *state, double x, double y,
340                        double dx, double dy,
341                        double originX, double originY,
342                        CharCode code, int nBytes, Unicode *u, int uLen)
343{
344  //I wouldn't know what size this should safely be. I guess 64 bytes should be
345  //enough for any unicode character
346  char buf[64];
347  int charLen;
348  x = dx;
349  y = dy;
350  //state->textTransformDelta(dx * state->getHorizScaling(), dy, &dx, &dy);
351  //state->transformDelta(dx, dy, &dx, &dy);
352  if (uLen == 1 && code == 0x20) {
353    //If we break a text sequence on space, then the X1 should be increased
354    //but the Y1 and Y2 should remain the same.
355    beginWord(state,X2+dx,Y2);
356  }
357  else {
358    X2    += dx;
359    Y2    += dy;
360    charLen = uMap->mapUnicode(*u,buf,sizeof(buf));
361    //Getting Unicode to libxml is something I need to fix.
362    //simply passing it using a bad-cast isn't working.
363    //I assume that CharCode code it the U+value of the unicode character
364    //But for a ligature code gives me DF which is the ringel-s, I guess
365    //code should be two bytes wide?
366    xmlNodeAddContentLen(N_word, BAD_CAST buf, charLen);
367  }
368}
369
370void ABWOutputDev::beginString(GfxState *state, GooString *s) {
371  double x,y;
372  //state->textTransform(x, y, &x, &y);
373  state->transform(state->getCurX(), state->getCurY(), &x, &y);
374  if (N_word) {
375    verDist = y-Y2;
376    horDist = x-X2;
377    //TEST:changed fabs(horDist) to horDist
378    //FIXME: this if statement seems awkward to me.
379    if (horDist > (state->getTransformedFontSize()*maxWordSpacing) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
380      beginTextBlock(state,x,y);
381    }
382    else {
383      if ((horDist > (state->getTransformedFontSize()*minWordBreakSpace)) || (fabs(verDist) > (state->getTransformedFontSize()/maxLineSpacingDelta))) {
384        beginWord(state,x,y);
385      }
386    }
387  }
388  else {
389  //This is the first word. Clear all values and call beginWord;
390    X2 = x;
391    Y2 = y;
392    horDist = 0;
393    verDist = 0;
394    height  = 0;
395    beginTextBlock(state,x,y);
396  }
397}
398
399void ABWOutputDev::endString(GfxState *state) {
400
401}
402
403void ABWOutputDev::beginWord(GfxState *state, double x, double y){
404  char buf[20];
405//  printf("***BREAK!***\n");
406  endWord();
407  X1 = x;
408  Y2 = y;
409
410  horDist = X1-X2;
411  verDist = Y1-Y2;
412
413  X2 = X1;
414  height = state->getFont()->getAscent() * state->getTransformedFontSize();
415  Y1 = Y2-height;
416
417  N_word = xmlNewChild(N_Block, NULL, BAD_CAST "word", NULL);
418  sprintf(buf, "%f", X1); xmlNewProp(N_word, BAD_CAST "X1", BAD_CAST buf);
419  sprintf(buf, "%f", Y1); xmlNewProp(N_word, BAD_CAST "Y1", BAD_CAST buf);
420  sprintf(buf, "%d", Style); xmlNewProp(N_word, BAD_CAST "style", BAD_CAST buf);
421}
422
423void ABWOutputDev::endWord(){
424  char buf[20];
425  if (N_word) {
426    sprintf(buf, "%f", X2);    xmlNewProp(N_word, BAD_CAST "X2", BAD_CAST buf);
427    sprintf(buf, "%f", Y2);    xmlNewProp(N_word, BAD_CAST "Y2", BAD_CAST buf);
428    sprintf(buf, "%f", X2-X1); xmlNewProp(N_word, BAD_CAST "width", BAD_CAST buf);
429    sprintf(buf, "%f", Y2-Y1); xmlNewProp(N_word, BAD_CAST "height", BAD_CAST buf);
430    N_word = NULL;
431  }
432}
433
434void ABWOutputDev::beginTextBlock(GfxState *state, double x, double y){
435  endTextBlock();
436  N_Block = xmlNewChild(N_page, NULL, BAD_CAST "Textblock", NULL);
437  beginWord(state,x,y);
438}
439
440void ABWOutputDev::endTextBlock(){
441  if (N_Block) {
442    endWord();
443    N_Block = NULL; 
444  }
445}
446/*
447This will be a function to retrieve coherent text blocks from the chunk tree.*/
448void ABWOutputDev::interpretXYTree(){
449  xmlNodePtr N_oldPage;
450  N_oldPage = N_page;
451  N_page = xmlNewNode(NULL, BAD_CAST "page");
452  N_column = N_page;
453  //xmlAddChild(N_content, N_page);
454  N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
455  ATP_recursive(N_oldPage);
456}
457
458void ABWOutputDev::ATP_recursive(xmlNodePtr N_parent){
459  xmlNodePtr N_first, N_second, N_line, N_tempCol, N_tempColset = NULL;
460
461  N_first  = N_parent->children;
462  if (!N_first)
463    return;
464
465  N_second = N_first->next;
466/*
467  Possibilities:
468  there is one child node
469    Because we cleaned up before the only case where we allow one childnode is
470    within Textblocks and textBlocks within 'vertical' nodes.
471      basically one text node means: add it to the current block.
472  There are two childnodes
473    This can be two verticals, two horizontals or one horizontal and a text node.
474    verticals:
475      If the first is vertical, the second is as well.
476      verticals mean: create a new Block, add a column per vertical make the
477      vertical the block and recurse inside.
478      then make the second vertical the block and recurse inside
479      then finish the block (ie. create a new one)
480    horizontal and or Textblocks
481        if first is textnode
482          add first to block
483          if second is textnode
484            at to block
485          else
486            call again
487        else
488          begin new block
489            call again
490          begin new block
491          if second is text node
492            add to block
493          else
494            call again
495  there are more then two child nodes
496    this can be a number of Textblocks and horizontals
497    add the textNodes to the current Block
498    if a horizontal is encountered enter it and generate a new block afterwards
499  */
500  //fprintf(stderr,"**********************************************************************\n");
501  //xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
502  switch (xmlLsCountNode(N_parent)) {
503  case 1:
504    //fprintf(stderr,"case 1\n");
505    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
506    xmlUnlinkNode(N_first);
507    xmlAddChild(N_line, N_first);
508    break;
509  case 2:
510    //fprintf(stderr,"case 2\n");
511    if (xmlStrcasecmp(N_first->name,BAD_CAST "vertical") == 0){
512      //store the column for the moment
513      N_tempCol = N_column;
514      /*If we have three columns they will turn up in the tree as:
515      <vertical>
516        <vertical/>
517        <vertical/>
518      </vertical>
519      <vertical/>
520      */
521      //if the parent is a vertical as well, we can skip the colset generation
522      //thing here we can also remove the just added column and block, because
523      //these are going to replace them
524      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
525        //fprintf(stderr,"first time column\n");
526        N_tempColset = N_colset;
527        N_colset = xmlNewChild(N_column, NULL, BAD_CAST "colset", NULL);
528        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
529        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
530      }
531      else {
532        //fprintf(stderr,"second time column\n");
533        xmlUnlinkNode(N_column);
534        N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
535        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
536      }
537      //fprintf(stderr,"Building first column...\n");
538      ATP_recursive(N_first);
539      N_column = xmlNewChild(N_colset, NULL, BAD_CAST "column", NULL);
540      N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
541      //fprintf(stderr,"Building second column...\n");
542      ATP_recursive(N_second);
543      //make sure we end the column by continuing in the master column and
544      //setting the block and line to it
545      N_column = N_tempCol;
546      if (xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") != 0){
547        if (N_tempColset != NULL)
548          N_colset = N_tempColset;
549        else
550          fprintf(stderr,"N_templColset should not! be empty (line 823)");//FIXME: add assert
551      }
552    }
553    else {
554      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0) {
555        //fprintf(stderr,"add first as textblock\n");
556        N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
557        xmlUnlinkNode(N_first);
558        xmlAddChild(N_line, N_first);
559        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
560          //fprintf(stderr,"add second as textblock\n");
561          //FIXME: this is not neat. We should ignore the cut ignoring when there are only two elements above
562          //line aggregation doesn't work anyway atm.
563          xmlUnlinkNode(N_second);
564          xmlAddChild(N_line, N_second);
565          //We have two textChunks that are going to be added to the line.
566          //the following statements make the line wrap around both textblocks
567          //if the firstX1 is smaller then the second X1 use the first, else use the second etc.
568        }
569        else {
570          //fprintf(stderr,"recursing into second\n");
571          ATP_recursive(N_second);
572        }
573      }
574      else {
575        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
576        //fprintf(stderr,"recursing into first\n");
577        ATP_recursive(N_first);
578        N_Block = xmlNewChild(N_column, NULL, BAD_CAST "chunk", NULL);
579        if (xmlStrcasecmp(N_second->name,BAD_CAST "Textblock") == 0) {
580          //fprintf(stderr,"add second as textblock\n");
581          N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
582          xmlUnlinkNode(N_second);
583          xmlAddChild(N_line, N_second);
584        }
585        else {
586          //fprintf(stderr,"recursing into second\n");
587          ATP_recursive(N_second);
588        }
589      }
590    }
591    break;
592  default:
593    //double tX1=0, tX2=0, tY1=0, tY2=0;
594    //fprintf(stderr,"case default\n");
595    N_line = xmlNewChild(N_Block, NULL, BAD_CAST "line", NULL);
596    while (N_first){
597      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) < tX1 ? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X1")) : tX1 = tX1;
598      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) > tX2 ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "X2")) : tX2 = tX2;
599      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) < tY1 ? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y1")) : tY1 = tY1;
600      //xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) > tY2 ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_first,BAD_CAST "Y2")) : tY1 = tY2;
601      N_second = N_first->next;
602      if (xmlStrcasecmp(N_first->name,BAD_CAST "Textblock") == 0){
603        xmlUnlinkNode(N_first);
604        xmlAddChild(N_line, N_first);
605      }
606      else { //fprintf(stderr,"This shouldn't happen! (line 700)\n");
607      }
608      N_first = N_second;
609    }
610    break;
611  }
612}
613
614/*The cleanup function. It started out as a simple function to remove empty nodes
615so that I could call xmladdnewchildnode as often as I liked so that I wouldn't get seg-faults
616It is now a bit more advanced, makes sure the tree is as it's supposed to be and adds information too*/
617void ABWOutputDev::cleanUpNode(xmlNodePtr N_parent, bool aggregateInfo){
618  double tX1=-1, tX2=-1, tY1=-1, tY2=-1;
619  xmlNodePtr N_cur, N_next;
620  N_cur = N_parent->children;
621  char buf[20];
622  int prevStyle = -1;
623  xmlChar *val;
624  int styleLength = xmlLsCountNode(N_styleset)+1;
625  float stylePos;
626  int *styles = new int[styleLength];
627  for (int i=1; i< styleLength; i++) { styles[i] = 0;}
628  /*
629  ignore two horizontal nodes with textBlocks right underneath them. They
630  signal the end of a chunk, and the horizontal seperation needs to be
631  preserved, because it means they are different lines. The second horizontal
632  therefore needs to be kept.
633  */
634  if ((xmlLsCountNode(N_parent) == 2)
635      &&
636     xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0
637      && 
638     N_cur
639      &&
640     N_cur->next
641      &&
642     xmlStrcasecmp(N_cur->name,BAD_CAST "horizontal") == 0 && xmlStrcasecmp(N_cur->next->name,BAD_CAST "horizontal") == 0
643      &&
644     xmlLsCountNode(N_cur) == 1 && xmlLsCountNode(N_cur->next) == 1
645      &&
646     xmlStrcasecmp(N_cur->children->name,BAD_CAST "Textblock") == 0 && xmlStrcasecmp(N_cur->next->children->name,BAD_CAST "Textblock") == 0
647     ) {
648    xmlAddPrevSibling(N_cur->next,N_cur->children); 
649    xmlUnlinkNode(N_cur);
650  } 
651  /*
652  This removes columns if one of the parts is actually a single letter.
653  I found out I liked the columns better, so I have the code commented out.
654  */
655/*  else if ((xmlLsCountNode(N_parent) == 2)
656             &&
657            N_cur
658             &&
659            N_cur->next
660             &&
661            xmlStrcasecmp(N_cur->name,BAD_CAST "vertical") == 0
662             &&
663            xmlStrcasecmp(N_cur->next->name,BAD_CAST "vertical") == 0
664             &&
665            (N_cur->children)
666             &&
667            (N_cur->children->children)
668             &&
669            (N_cur->children->children->children)
670             &&
671            xmlStrlen(N_cur->children->children->children->content) == 1) {
672    N_next = N_cur->next;
673    xmlAddChild(N_parent, N_next->children);
674    xmlAddPrevSibling(N_next->children->children, N_cur->children);
675    xmlUnlinkNode(N_cur);
676    xmlUnlinkNode(N_next);
677  } */else {
678    while (N_cur){
679      N_next = N_cur->next;
680      cleanUpNode(N_cur, aggregateInfo);
681      if (xmlLsCountNode(N_cur) == 0 && (xmlStrcasecmp(N_cur->name,BAD_CAST "cbr") != 0) && (xmlStrcasecmp(N_cur->name,BAD_CAST "s") != 0))
682        xmlUnlinkNode(N_cur);
683      //If the node is still around
684      N_cur = N_next;
685    }
686  }
687  //If a countainer element has only one child, it can be removed except for vertical
688  //cuts with only one textElement;
689  //the main reason for this code is to remove the crumbs after cleaning up in the loop above
690  if ((xmlLsCountNode(N_parent) == 1) && ((xmlStrcasecmp(N_parent->name,BAD_CAST "horizontal") == 0) || ((xmlStrcasecmp(N_parent->name,BAD_CAST "vertical") == 0) && (xmlStrcasecmp(N_parent->children->name,BAD_CAST "Textblock") != 0)))){
691    N_cur = N_parent->children;
692    xmlAddPrevSibling(N_parent,N_cur);
693    xmlUnlinkNode(N_parent);
694  }
695  //We cannot remove the page element so if it has only one childnode, we remove that childnode instead
696  if ((xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0) && (xmlLsCountNode(N_parent) == 1)) {
697    N_cur = N_parent->children->children;
698    while (N_cur){
699      N_next = N_cur->next;
700      xmlUnlinkNode(N_cur);
701      xmlAddChild(N_parent, N_cur);
702      N_cur = N_next;
703    }
704    xmlUnlinkNode(N_parent->children);
705  }
706  //Ok, so by this time the N_parent and his children are guaranteed to be clean
707  //this for loop gets information from the 'word' elements and propagates it up
708  //the tree.
709  if (aggregateInfo && xmlStrcasecmp(N_parent->name,BAD_CAST "word") != 0) {
710    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
711      val = xmlGetProp(N_cur,BAD_CAST "style");
712      stylePos = xmlXPathCastStringToNumber(val);
713      //fprintf(stderr,"1: %f, %d\n",stylePos,int(stylePos));
714      styles[int(stylePos)]=styles[int(stylePos)]+1;
715      //fprintf(stderr,"2: styles[%d] = %d\n",int(stylePos),styles[int(stylePos)]);
716      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) < tX1 || tX1 == -1)? tX1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X1")) : tX1 = tX1;
717      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) > tX2)             ? tX2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "X2")) : tX2 = tX2;
718      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) < tY1 || tY1 == -1)? tY1 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y1")) : tY1 = tY1;
719      (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) > tY2)             ? tY2 = xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "Y2")) : tY2 = tY2;
720    }
721    sprintf(buf, "%f", tX1);     xmlSetProp(N_parent, BAD_CAST "X1", BAD_CAST buf);
722    sprintf(buf, "%f", tX2);     xmlSetProp(N_parent, BAD_CAST "X2", BAD_CAST buf);
723    sprintf(buf, "%f", tY1);     xmlSetProp(N_parent, BAD_CAST "Y1", BAD_CAST buf);
724    sprintf(buf, "%f", tY2);     xmlSetProp(N_parent, BAD_CAST "Y2", BAD_CAST buf);
725    sprintf(buf, "%f", tX2-tX1); xmlSetProp(N_parent, BAD_CAST "width", BAD_CAST buf);
726    sprintf(buf, "%f", tY2-tY1); xmlSetProp(N_parent, BAD_CAST "height", BAD_CAST buf);
727    prevStyle = 0;
728    styles[0] = -1;
729    for (int i=1; i< styleLength; i++) { if (styles[i] > styles[prevStyle]) prevStyle = i; }
730    //fprintf(stderr,"%d\n", prevStyle);
731    if (prevStyle > 0){
732      sprintf(buf, "%d", prevStyle);     xmlSetProp(N_parent, BAD_CAST "style", BAD_CAST buf);
733    }
734  }
735  if (N_parent->children && xmlStrcasecmp(N_parent->children->name,BAD_CAST "line") == 0 && xmlGetProp(N_parent->children,BAD_CAST "alignment") != NULL)
736    xmlSetProp(N_parent, BAD_CAST "alignment", xmlGetProp(N_parent->children,BAD_CAST "alignment"));
737
738   delete[] styles;
739}
740
741void ABWOutputDev::generateParagraphs() {
742  xmlNodePtr N_cur, N_parent, N_p, N_line, N_next;
743  int lvl;
744  //basically I first detect the text-alignment within blocks.
745  //ASSUMPTION: my block seperation thing is good enough so I don't need to
746  //worry about two alignments in one paragraph
747 
748  X1 = 0;
749  X2 = pdfdoc->getPageCropWidth(G_pageNum);
750  Y1 = 0;
751  Y2 = pdfdoc->getPageCropHeight(G_pageNum);
752  addAlignment(N_page);
753 
754  //then it's a switch per alignement
755  N_cur = N_page->children;
756  N_parent = N_page;
757  lvl = 1;
758  while (N_cur) {
759    if (xmlStrcasecmp(N_cur->name,BAD_CAST "chunk") == 0){
760      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
761      xmlAddPrevSibling(N_cur,N_p);
762      //N_p = xmlNewChild(N_parent, NULL, BAD_CAST "chunk", NULL);
763      //A new paragraph is created when:
764      switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "alignment")))){
765      //left
766      case 1: //the distance between the texblock X2 and the last word X2 is more than
767         //the following first word width.
768         N_line = N_cur->children;
769         while (N_line){
770           N_next = N_line->next;
771           xmlUnlinkNode(N_line);
772           xmlAddChild(N_p,N_line);
773           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
774           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
775             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
776               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
777               xmlAddPrevSibling(N_cur,N_p);
778             }
779           }
780           N_line = N_next;
781         }
782         break;
783      //right
784      case 2: //the same but now with X1 and first word and following last word
785         N_line = N_cur->children;
786         while (N_line){
787           N_next = N_line->next;
788           xmlUnlinkNode(N_line);
789           xmlAddChild(N_p,N_line);
790           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
791           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
792             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
793             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
794               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
795               xmlAddPrevSibling(N_cur,N_p);
796             }
797           }
798           N_line = N_next;
799         }
800         break;
801      //centered
802      case 3: //the combined left and right space is more than the following first word
803         N_line = N_cur->children;
804         while (N_line){
805           N_next = N_line->next;
806           xmlUnlinkNode(N_line);
807           xmlAddChild(N_p,N_line);
808           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
809           if (N_next && xmlStrcasecmp(N_next->name,BAD_CAST "line") == 0){
810             //fprintf(stderr,"width_next=%f, X2_bl=%f, X2_w=%f\n",xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")),xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")));
811             if (xmlXPathCastStringToNumber(xmlGetProp(N_next->children->children,BAD_CAST "width")) < (xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "width")) - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "width")))){
812               N_p = xmlNewNode(NULL, BAD_CAST "chunk");
813               xmlAddPrevSibling(N_cur,N_p);
814             }
815           }
816           N_line = N_next;
817         }
818         break;
819      //justified
820      case 4:
821         //we break on all alignment=1 lines. A line with alignment=1 that is the first of a block will
822         //also initiate a paragraph break before.
823         N_line = N_cur->children;
824         if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
825           N_p = xmlNewNode(NULL, BAD_CAST "chunk");
826           xmlAddPrevSibling(N_cur,N_p);
827         }
828         while (N_line){
829           N_next = N_line->next;
830           xmlUnlinkNode(N_line);
831           xmlAddChild(N_p,N_line);
832           if (xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "alignment")) == 1){
833             N_p = xmlNewNode(NULL, BAD_CAST "chunk");
834             xmlAddPrevSibling(N_cur,N_p);
835           }
836           xmlSetProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
837           N_line = N_next;
838         }
839         break;
840      }
841    }
842    else if (xmlStrcasecmp(N_cur->name,BAD_CAST "colset") == 0 || xmlStrcasecmp(N_cur->name,BAD_CAST "column") == 0){
843      N_parent = N_cur;
844      N_cur = N_cur->children;
845      lvl++;
846      N_p = xmlNewNode(NULL, BAD_CAST "chunk");
847      xmlAddPrevSibling(N_cur,N_p);
848      continue;
849    }
850    if (N_cur->next)
851      N_cur = N_cur->next;
852    else while (lvl > 0){
853      N_cur = N_parent;
854      N_parent = N_cur->parent;
855      lvl--;
856      if (N_cur->next){
857        N_cur = N_cur->next;
858        break;
859      }
860    }
861    if (lvl==0)
862      N_cur = NULL;
863  }
864}
865
866//function that adds an 'alignment=' property to the <chunk>s
867void ABWOutputDev::addAlignment(xmlNodePtr N_parent) {
868  xmlNodePtr N_chunk, N_line;
869  double tX1, tX2;
870  bool leftMatch, rightMatch, centerMatch;
871  int leftCnt = 0, rightCnt = 0, cntrCnt = 0, justCnt = 0;
872  //fprintf(stderr,"Entering addAlignment\n");
873  for (N_chunk = N_parent->children; N_chunk; N_chunk = N_chunk->next) {
874    if (xmlStrcasecmp(N_chunk->name,BAD_CAST "chunk") == 0){
875      X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
876      X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
877      //fprintf(stderr,"Found chunk\n");
878      //if the chunk contains only one line, we don't need to loop through it.
879      if (xmlLsCountNode(N_chunk) == 1){
880        //fprintf(stderr,"Processing line\n");
881        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")));
882        //fprintf(stderr,"%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
883        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))));
884        // a one line chunk, is either centered or left or right-aligned.
885        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"))) > 1) {
886          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
887          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "2");
888          //fprintf(stderr,"alignment = right\n");
889        }
890        else { 
891        if ((xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"))-X1)-(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2")))< -1) {
892          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
893          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "1");
894          //fprintf(stderr,"alignment = left\n");
895        }
896        else {
897          xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
898          xmlNewProp(N_chunk->children, BAD_CAST "alignment", BAD_CAST "3");
899          //fprintf(stderr,"alignment = center\n");
900        }
901        }
902      }
903      else {
904      leftCnt = 0;
905      rightCnt = 0;
906      cntrCnt = 0;
907      justCnt = 0;
908      for (N_line = N_chunk->children; N_line; N_line = N_line->next) {
909        //fprintf(stderr,"Processing line\n");
910        /*
911        |X1 - cX1| == 1
912        |X2 - cX2| == 1
913        |(cX1-X1)-(X2-cX2)| == 1
914        ok, each line can be just as wide as the current set,
915        it can be smaller and moved to the right
916        it can be smaller and moved to the left.
917        it can
918        */
919        //fprintf(stderr,"X1=%f, X2=%f, cX1=%f, cX2=%f\n",X1,X2,xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")), xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")));
920        //fprintf(stderr,"cX1-X1=%f, X2-cX2=%f\n",(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1")) - X1),(X2 - xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))));
921        leftMatch =  fabs(xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1) < 2;
922        rightMatch =  fabs(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2"))) < 2;
923        centerMatch =  fabs((xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X1"))-X1)-(X2-xmlXPathCastStringToNumber(xmlGetProp(N_line,BAD_CAST "X2")))) < 2;
924        if (leftMatch && rightMatch) {
925          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "4");
926          justCnt++;
927        }
928        else if (centerMatch) {
929          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "3");
930          cntrCnt++;
931        }
932        else if (rightMatch) {
933          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "2");
934          rightCnt++;
935        }
936        else {
937          xmlNewProp(N_line, BAD_CAST "alignment", BAD_CAST "1");
938          leftCnt++;
939        }
940      }
941      //there is almost always one justified line in a centered text
942      //and most justified blocks have at least one left aligned line
943      //fprintf(stderr,"1:%d ,2:%d ,3:%d ,4:%d\n",leftCnt,justCnt,cntrCnt,rightCnt);
944      if ((leftCnt-1 >= justCnt) && (leftCnt >= rightCnt) && (leftCnt >= cntrCnt))
945        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "1");
946      else if ((justCnt >= leftCnt-1) && (justCnt >= rightCnt) && (justCnt >= cntrCnt))
947        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "4");
948      else if ((cntrCnt >= justCnt-1) && (cntrCnt >= rightCnt) && (cntrCnt >= leftCnt))
949        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "3");
950      else
951        xmlNewProp(N_chunk, BAD_CAST "alignment", BAD_CAST "2");
952      }
953    } 
954    else {
955      if (xmlStrcasecmp(N_chunk->name,BAD_CAST "colset") == 0){
956        //fprintf(stderr,"Found a colset\n");
957        addAlignment(N_chunk);
958      }
959      else {
960        if (xmlStrcasecmp(N_chunk->name,BAD_CAST "column") == 0){
961          //fprintf(stderr,"Found a column\n");
962          tX1 = X1;
963          tX2 = X2;
964          X1 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X1"));
965          X2 = xmlXPathCastStringToNumber(xmlGetProp(N_chunk,BAD_CAST "X2"));
966          addAlignment(N_chunk);
967          X1 = tX1;
968          X2 = tX2;
969        }
970        else { //fprintf(stderr,"Found something else\n");
971        }
972      }
973    }
974  }
975//parse all blocks, and all lines within all blocks
976//do a set of checks and tick a flag if the check fails
977//check for line X1 is textBlock X1
978//check for line X2 is textblock X2
979//check if line is centered in textBock (LX1 != TX1 && LX2 != TX2 && LX1-TX1 == TX2=LX2)
980//if the LX1 != TX1 then how much is the difference?
981//a line isn't left aligned if all lines have a different X1 <= not so strong assumption.
982
983//justified if both are straight except for a couple of (same factor sized) indents at the left
984//else centered if above calculation is correct
985//else left aligned if left side is more straight than right (more lines in the same X1 or common factor
986//else right
987}
988
989void ABWOutputDev::setPDFDoc(PDFDoc *priv_pdfdoc) {
990  pdfdoc = priv_pdfdoc;
991}
992
993void ABWOutputDev::createABW() {
994  //*************************************************************
995  //change styles to abiword format
996  xmlNodePtr N_cur, N_next;
997  xmlAttrPtr N_prop;
998  char buf[500];
999  for (N_cur = N_styleset->children; N_cur; N_cur = N_cur->next){
1000    sprintf(buf,"margin-top:0pt; color:000000; margin-left:0pt; text-position:normal; widows:2; text-indent:0in; font-variant:normal; margin-right:0pt; lang:nl-NL; line-height:1.0; font-size:%dpt; text-decoration:none; margin-bottom:0pt; bgcolor:transparent; text-align:left; font-stretch:normal;",int(xmlXPathCastStringToNumber(xmlGetProp(N_cur,BAD_CAST "size"))));
1001    strncat(buf,"font-family:",12);
1002    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "font"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "font")));
1003    strncat(buf,";",1);
1004    strncat(buf,"font-weight:",12);
1005    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "bold"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "bold")));
1006    strncat(buf,"font-style:",12);
1007    strncat(buf,(char *)xmlGetProp(N_cur,BAD_CAST "italic"),strlen((char *)xmlGetProp(N_cur,BAD_CAST "italic")));
1008    xmlSetProp(N_cur, BAD_CAST "props", BAD_CAST buf);
1009    N_prop = xmlHasProp(N_cur, BAD_CAST "id");
1010    if (N_prop != NULL) xmlRemoveProp(N_prop);
1011    N_prop = xmlHasProp(N_cur, BAD_CAST "size");
1012    if (N_prop != NULL) xmlRemoveProp(N_prop);
1013    N_prop = xmlHasProp(N_cur, BAD_CAST "bold");
1014    if (N_prop != NULL) xmlRemoveProp(N_prop);
1015    N_prop = xmlHasProp(N_cur, BAD_CAST "italic");
1016    if (N_prop != NULL) xmlRemoveProp(N_prop);
1017    N_prop = xmlHasProp(N_cur, BAD_CAST "font");
1018    if (N_prop != NULL) xmlRemoveProp(N_prop);
1019  }
1020  //*************************************************************
1021  //Change the rest of the document
1022  //each child of N_content is a page
1023  N_cur = N_content->children;
1024  while (N_cur){
1025    //we creat a section node and attach it to the root, it will com after all
1026    //the page nodes. Then we transform the page, and finally remove it
1027    N_next = N_cur->next;
1028    //fprintf(stderr,"***Transforming page\n");
1029    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1030    transformPage(N_cur);
1031    xmlUnlinkNode(N_cur);
1032    //fprintf(stderr,"***Finished transforming page\n");
1033    N_cur = N_next;
1034  }
1035  cleanUpNode(N_root, false);
1036}
1037
1038void ABWOutputDev::transformPage(xmlNodePtr N_parent){
1039  char buf[60];
1040  xmlNodePtr N_cur, N_curLine, N_curText, N_curWord, text, space;
1041  //translate the nodes into abiword nodes
1042  if (xmlStrcasecmp(N_parent->name,BAD_CAST "page") == 0){
1043    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1044      //fprintf(stderr,"**pass a page child\n");
1045      transformPage(N_cur);
1046    }
1047  }
1048  if (xmlStrcasecmp(N_parent->name,BAD_CAST "chunk") == 0){
1049    //fprintf(stderr,"Found a chunk\n");
1050    //I start a <p> on each chunk and add all word containment
1051    N_text = xmlNewChild(N_Block, NULL, BAD_CAST "p", NULL);
1052    if (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "style"))) > 0){
1053      xmlNewProp(N_text, BAD_CAST "style", xmlGetProp(N_parent,BAD_CAST "style"));
1054    }
1055    switch (int(xmlXPathCastStringToNumber(xmlGetProp(N_parent,BAD_CAST "alignment")))){
1056    case 1: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:left");
1057           break;
1058    case 2: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:right");
1059           break;
1060    case 3: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:center");
1061           break;
1062    case 4: xmlNewProp(N_text, BAD_CAST "props", BAD_CAST "text-align:justify");
1063           break;
1064    }
1065    for (N_curLine = N_parent->children; N_curLine; N_curLine = N_curLine->next){
1066      //fprintf(stderr,"A line\n");
1067      for (N_curText = N_curLine->children; N_curText; N_curText = N_curText->next){
1068        //fprintf(stderr,"a textNode\n");
1069        for (N_curWord = N_curText->children; N_curWord; N_curWord = N_curWord->next){
1070          //fprintf(stderr,"a word\n");
1071          text = N_curWord->children;
1072          xmlUnlinkNode(text);
1073          xmlAddChild(N_text,text);
1074          space = xmlNewText(BAD_CAST " ");
1075          xmlAddChild(N_text,space);
1076        }
1077      }
1078    }
1079  }
1080  if (xmlStrcasecmp(N_parent->name,BAD_CAST "column") == 0){
1081    //fprintf(stderr,"Found a column\n");
1082    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1083      transformPage(N_cur);
1084    }
1085    xmlNewChild(N_text, NULL, BAD_CAST "cbr", NULL);
1086  }
1087  if (xmlStrcasecmp(N_parent->name,BAD_CAST "colset") == 0){
1088    //fprintf(stderr,"Found a colset\n");
1089    //create new section columns: count childNodes of N_cur
1090    //recurse through chunks and create textNodes
1091    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1092    sprintf(buf,"columns:%d",xmlLsCountNode(N_parent));
1093    xmlNewProp(N_Block, BAD_CAST "props", BAD_CAST buf);
1094    for (N_cur = N_parent->children; N_cur; N_cur = N_cur->next){
1095      transformPage(N_cur);
1096    }
1097    N_Block = xmlNewChild(N_root, NULL, BAD_CAST "section", NULL);
1098  }
1099  //fprintf(stderr,"at the end\n");
1100}
1101
1102//Count nodes, copied from debugxml.c from libxml
1103// libxml copyright file below
1104/*
1105Except where otherwise noted in the source code (e.g. the files hash.c,
1106list.c and the trio files, which are covered by a similar licence but
1107with different Copyright notices) all the files are:
1108
1109 Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
1110
1111Permission is hereby granted, free of charge, to any person obtaining a copy
1112of this software and associated documentation files (the "Software"), to deal
1113in the Software without restriction, including without limitation the rights
1114to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1115copies of the Software, and to permit persons to whom the Software is fur-
1116nished to do so, subject to the following conditions:
1117
1118The above copyright notice and this permission notice shall be included in
1119all copies or substantial portions of the Software.
1120
1121THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1122IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
1123NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
1124DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1125IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CON-
1126NECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1127
1128Except as contained in this notice, the name of Daniel Veillard shall not
1129be used in advertising or otherwise to promote the sale, use or other deal-
1130ings in this Software without prior written authorization from him.
1131*/
1132int ABWOutputDev::xmlLsCountNode(xmlNodePtr node) {
1133  int ret = 0;
1134  xmlNodePtr list = NULL;
1135
1136  if (node == NULL)
1137    return(0);
1138
1139  switch (node->type) {
1140    case XML_ELEMENT_NODE:
1141      list = node->children;
1142      break;
1143    case XML_DOCUMENT_NODE:
1144    case XML_HTML_DOCUMENT_NODE:
1145#ifdef LIBXML_DOCB_ENABLED
1146    case XML_DOCB_DOCUMENT_NODE:
1147#endif
1148      list = ((xmlDocPtr) node)->children;
1149      break;
1150    case XML_ATTRIBUTE_NODE:
1151      list = ((xmlAttrPtr) node)->children;
1152      break;
1153    case XML_TEXT_NODE:
1154    case XML_CDATA_SECTION_NODE:
1155    case XML_PI_NODE:
1156    case XML_COMMENT_NODE:
1157      if (node->content != NULL) {
1158        ret = xmlStrlen(node->content);
1159      }
1160      break;
1161    case XML_ENTITY_REF_NODE:
1162    case XML_DOCUMENT_TYPE_NODE:
1163    case XML_ENTITY_NODE:
1164    case XML_DOCUMENT_FRAG_NODE:
1165    case XML_NOTATION_NODE:
1166    case XML_DTD_NODE:
1167    case XML_ELEMENT_DECL:
1168    case XML_ATTRIBUTE_DECL:
1169    case XML_ENTITY_DECL:
1170    case XML_NAMESPACE_DECL:
1171    case XML_XINCLUDE_START:
1172    case XML_XINCLUDE_END:
1173      ret = 1;
1174      break;
1175  }
1176  for (;list != NULL;ret++) 
1177    list = list->next;
1178  return(ret);
1179}
Note: See TracBrowser for help on using the repository browser.