Library: develop Gnuplot code for flexible scatter-regression

The idea is to build the Layout-branching into the generated Gnuplot script,
based on the number of data columns detected. If there is at least one further
data column, then the "mulitplot" layout will be used to feature this
additional data in a secondary diagram below with aligned axis;
if more than one additional data column is present, all further
visualisation will draw points, using the secondary Y-axis

Moreover, Gnuplot can calculate the linear regresssion line itself,
and the drawing will then be done using an `arrow` command,
defining a function regLine(x) based on the linear model.
This commit is contained in:
Fischlurch 2024-04-02 23:59:59 +02:00
parent 13a6bba381
commit c997fc2341
4 changed files with 221 additions and 43 deletions

View file

@ -106,8 +106,11 @@ ${else
set xlabel abscissaName
${end if XLabel
}${if YLabel
}set ylabel '${YLabel}'
${end if YLabel
}set ylabel '${YLabel}' ${end if YLabel
}
${if Yrange}
set yrange [${Yrange}]
${endif
}
set key autotitle columnheader tmargin
@ -122,34 +125,64 @@ plot for [i=2:*] $RunData using 1:i with ${DiagramKind} linestyle i-1
const string GNUPLOT_SCATTER_REGRESSION = R"~(#
#
####---------Scatter-Regression-Plot-------------
#
stats $RunData using 1:2 nooutput
set arrow 1 from graph 0, first 1 to graph 1, first 30 nohead ls 9
set multiplot layout 2,1
set lmargin at screen 0.12
set rmargin at screen 0.88
# draw regression line as arrow
regLine(x) = STATS_slope * x + STATS_intercept
set arrow 1 from graph 0, first regLine(STATS_min_x) \
to graph 1, first regLine(STATS_max_x) \
nohead linestyle 9
plots = STATS_columns - 1
# Adjust layout based on number of data sequences;
# additional sequences placed into secondary diagram
#
if (plots > 1) {
set multiplot layout 2,1 # 2 rows 1 column
set lmargin at screen 0.12 # fixed margins to align diagrams
set rmargin at screen 0.88
}
####-------------------------------
plot $RunData using 1:2 with points linestyle 1
unset arrow 1
unset arrow 10
unset arrow 11
set border 2+8
set yrange [0:8]
set y2range [500:2000]
unset x2label
set format x ""
set ylabel "Y1 axis"
set y2label "Y2 axis" offset -2
set y2tics
plot $RunData using 1:3 with impulses linestyle 3, \
$RunData using 1:4 with points linestyle 5 axes x1y2
if (plots > 1) {
# switch off decorations for secondary diagram
unset arrow 1
unset arrow 10
unset arrow 11
set border 2+8
${if Y2range}
set yrange [${Y2range}]
${endif
} unset x2label
set format x ""
${if Y2label
} set ylabel '${Y2label}' ${endif
}
if (plots <= 2) {
####---------------------------------
plot $RunData using 1:3 with impulses linestyle 3
} else {
# more than one additional data sequence
#
${if Y3range
} set y2range [${Y3range}] ${endif
} set y2tics
${if Y3label
} set y2label '${Y3label}' offset -1 ${endif
}
####---------------------------------------------
plot $RunData using 1:3 with impulses linestyle 3, \
for [i=4:*] $RunData using 1:i with points linestyle 5+(i-4) axes x1y2
}
}
)~";
}//(End)template and defaults definitions
@ -173,4 +206,17 @@ plot $RunData using 1:3 with impulses linestyle 3, \
return plot.render (params.genNode());
}
string
scatterRegression (ParamRecord params)
{
TextTemplate plot{GNUPLOT_BASIC_PLOT_DEF
+GNUPLOT_SCATTER_REGRESSION};
params.set ("CommonStyleDef", GNUPLOT_CommonStyleDef)
.set ("AxisGridSetup", GNUPLOT_AxisGridSetup)
;
return plot.render (params.genNode());
}
}} // namespace lib::gnuplot_gen

View file

@ -67,13 +67,38 @@ namespace gnuplot_gen { ///< preconfigured setup for Gnuplot data visualisation
const string KEY_CSVData = "CSVData";
const string KEY_DiagramKind = "DiagramKind";
const string KEY_Yrange = "Yrange";
const string KEY_Y2range = "Y2range";
const string KEY_Y3range = "Y3range";
const string KEY_Xlabel = "Xlabel";
const string KEY_Ylabel = "Ylabel";
const string KEY_Y2label = "Y2label";
const string KEY_Y3label = "Y2label";
/**
* Generate a Gnuplot diagram to visualise the given data points.
*/
string dataPlot (ParamRecord);
string dataPlot (string csvData) { return dataPlot (ParamRecord().set (KEY_CSVData, csvData)); }
inline string
dataPlot (string csvData)
{
return dataPlot (ParamRecord().set (KEY_CSVData, csvData));
}
/**
* Generate a (X,Y)-scatter plot with regression line
*/
string scatterRegression (ParamRecord);
inline string
scatterRegression (string csvData)
{
return scatterRegression (ParamRecord().set (KEY_CSVData, csvData));
}
}} // namespace lib::gnuplot_gen

View file

@ -55,7 +55,7 @@ namespace test{
run (Arg)
{
simpeUsage();
verify_instantiation();
plot_scatter_regression();
verify_keySubstituton();
verify_conditional();
verify_iteration();
@ -71,17 +71,19 @@ namespace test{
void
simpeUsage()
{
string gnuplot = gnuplot_gen::dataPlot (CSVData{{"step","fib"}
,{{0,1}
,{1,1}
,{2,2}
,{3,3}
,{4,5}
,{5,8}
,{6,13}
,{7,21.55}
}});
cout << gnuplot <<endl;
string gnuplot = gnuplot_gen::dataPlot(
CSVData{{"step","fib"}
,{{0,1}
,{1,1}
,{2,2}
,{3,3}
,{4,5}
,{5,8}
,{6,13}
,{7,21.55}
}});
// cout << gnuplot <<endl;
//Hint: gnuplot -p <scriptfile>
CHECK (contains (gnuplot, "set datafile separator \",;\""));
CHECK (contains (gnuplot, "\"step\",\"fib\""));
@ -92,12 +94,37 @@ namespace test{
/** @test TODO
* @todo WIP 4/24 🔁 define implement
/** @test Create a (x,y) scatter plot with regression line
* - in the simple case, there is only one diagram
* - use the `stats` command to let Gnuplot calculate the linear regression
* - draw a regrsssion line using the `arrow` command
* and a function representing the linear regression model
* @todo WIP 4/24 🔁 define implement
*/
void
verify_instantiation()
plot_scatter_regression()
{
string gnuplot = gnuplot_gen::scatterRegression(
CSVData{{"step","fib"}
,{{0,1}
,{1,1}
,{2,2}
,{3,3}
,{4,5}
,{5,8}
,{6,13}
,{7,21.55}
}});
cout << gnuplot <<endl;
CHECK (contains (gnuplot, "\"step\",\"fib\""));
CHECK (contains (gnuplot, "7,21.55"));
CHECK (contains (gnuplot, "regLine(x) = STATS_slope * x + STATS_intercept"));
CHECK (contains (gnuplot, "set arrow 1 from graph 0, first regLine(STATS_min_x)"));
CHECK (contains (gnuplot, "plot $RunData using 1:2 with points"));
// only one data row given => no multiplot layout
CHECK (not contains (gnuplot, "set multiplot"));
}

View file

@ -112419,9 +112419,10 @@ Date:&#160;&#160;&#160;Thu Apr 20 18:53:17 2023 +0200<br/>
<node COLOR="#435e98" CREATED="1711740518860" ID="ID_227437599" MODIFIED="1711767348734" TEXT="Achsen-Darstellung">
<node COLOR="#338800" CREATED="1711740528570" ID="ID_1468864641" MODIFIED="1711767075148" TEXT="Beschriftung?">
<icon BUILTIN="button_ok"/>
<node CREATED="1711767060332" ID="ID_1406838123" MODIFIED="1711767063608" TEXT="xlabel"/>
<node CREATED="1711767064292" ID="ID_1923875225" MODIFIED="1711767069647" TEXT="ylabel"/>
<node CREATED="1711767070283" ID="ID_1153901506" MODIFIED="1711767072584" TEXT="y2label"/>
<node CREATED="1711767060332" ID="ID_1406838123" MODIFIED="1712103787800" TEXT="XLabel"/>
<node CREATED="1711767064292" ID="ID_1923875225" MODIFIED="1712103791967" TEXT="YLabel"/>
<node CREATED="1711767070283" ID="ID_1153901506" MODIFIED="1712103802787" TEXT="Y2Label"/>
<node CREATED="1712103803738" ID="ID_1900393261" MODIFIED="1712103808437" TEXT="Y3Label"/>
</node>
<node COLOR="#435e98" CREATED="1711740538084" ID="ID_911257771" LINK="#ID_652804746" MODIFIED="1711767042423" TEXT="Grid?">
<icon BUILTIN="help"/>
@ -114497,11 +114498,72 @@ std::cout &lt;&lt; tmpl.render({&quot;what&quot;, &quot;World&quot;}) &lt;&lt; s
</node>
</node>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711821130246" ID="ID_1965869017" MODIFIED="1711821143254" TEXT="Scatter mit Regression">
<node COLOR="#338800" CREATED="1711821144151" ID="ID_944118646" MODIFIED="1712105188845" TEXT="Scatter mit Regression + Zusatzdaten">
<arrowlink COLOR="#fefac9" DESTINATION="ID_1400649622" ENDARROW="Default" ENDINCLINATION="-63;-1;" ID="Arrow_ID_464877044" STARTARROW="None" STARTINCLINATION="-52;53;"/>
<icon BUILTIN="button_ok"/>
<node BACKGROUND_COLOR="#ccb59b" COLOR="#6e2a38" CREATED="1712100595175" ID="ID_1742876349" MODIFIED="1712105207678" TEXT="es ist Test-Support-Code &#x27f9; pragmatische L&#xf6;sung">
<font ITALIC="true" NAME="SansSerif" SIZE="14"/>
<icon BUILTIN="yes"/>
</node>
<node COLOR="#338800" CREATED="1712100586841" ID="ID_1585686206" MODIFIED="1712105195332" TEXT="Flexibilit&#xe4;t schaffen">
<icon BUILTIN="yes"/>
<node CREATED="1712100629386" ID="ID_1231784651" MODIFIED="1712100658602" TEXT="Die Zahl der Datenspalten als Layout-Steuervariable verwenden"/>
<node CREATED="1712100660810" ID="ID_1873990314" MODIFIED="1712100721346" TEXT="User hat daf&#xfc;r zu sorgen, da&#xdf; diese Datenspalten in der ersten Datenzeile gegeben sind">
<richcontent TYPE="NOTE"><html>
<head/>
<body>
<p>
nicht Header-Zeile, also Zeile 2.
</p>
<p>
Einzelne Datenpunkte k&#246;nnen problemlos fehlen....
</p>
</body>
</html></richcontent>
</node>
<node CREATED="1712100729085" ID="ID_1670866548" MODIFIED="1712100853649" TEXT="explizit per Config-Parameter...">
<icon BUILTIN="yes"/>
<node CREATED="1712100772235" ID="ID_372704926" MODIFIED="1712100780714" TEXT="abweichende Canvas-Gr&#xf6;&#xdf;e"/>
<node CREATED="1712100781446" ID="ID_1880691346" MODIFIED="1712100794760" TEXT="Beschriftungen f&#xfc;r die 2. und 3 Y-Achse">
<node CREATED="1711767070283" ID="ID_161076061" MODIFIED="1712103802787" TEXT="Y2Label"/>
<node CREATED="1712103803738" ID="ID_530468043" MODIFIED="1712103808437" TEXT="Y3Label"/>
</node>
<node CREATED="1712100959798" ID="ID_1029187267" MODIFIED="1712103844150" TEXT="Yrange, Y2range, Y3range"/>
</node>
<node CREATED="1712100796037" ID="ID_484740364" MODIFIED="1712100853649" TEXT="Festlegung per Konvention">
<icon BUILTIN="yes"/>
<node CREATED="1712100802603" ID="ID_1229725819" MODIFIED="1712100822770" TEXT="3.Datenreihe &#x27f9; Impuls-Diagraamm unten Y1"/>
<node CREATED="1712100823521" ID="ID_1895296139" MODIFIED="1712100836507" TEXT="ab 4.Reihe &#x27f9; Punkt-Diagramm unten auf Y2"/>
<node CREATED="1712100837474" ID="ID_635390821" MODIFIED="1712100847917" TEXT="andere Varianten gibts ned">
<icon BUILTIN="smiley-oh"/>
</node>
</node>
</node>
</node>
<node COLOR="#338800" CREATED="1711821130246" ID="ID_1965869017" MODIFIED="1712105284331" TEXT="Scatter mit Regression">
<icon BUILTIN="button_ok"/>
<node CREATED="1712100915468" ID="ID_1400649622" MODIFIED="1712105174639" TEXT="automatisch aus Vorhergehendem (mit nur 1 Datenreihe)">
<linktarget COLOR="#fefac9" DESTINATION="ID_1400649622" ENDARROW="Default" ENDINCLINATION="-63;-1;" ID="Arrow_ID_464877044" SOURCE="ID_944118646" STARTARROW="None" STARTINCLINATION="-52;53;"/>
<icon BUILTIN="idea"/>
</node>
<node COLOR="#338800" CREATED="1712101781128" ID="ID_304832758" MODIFIED="1712105281865" TEXT="Regrssions-Parameter">
<icon BUILTIN="button_ok"/>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1712101788544" ID="ID_703744386" MODIFIED="1712105228379" TEXT="kann man entweder vom User durchgeben lassen">
<icon BUILTIN="flag-yellow"/>
</node>
<node BACKGROUND_COLOR="#eee5c3" COLOR="#990000" CREATED="1711821144151" ID="ID_944118646" MODIFIED="1711821172138" TEXT="Scatter mit Regrssion + Zusatzdaten">
<icon BUILTIN="flag-yellow"/>
<node COLOR="#338800" CREATED="1712101798926" ID="ID_259712004" MODIFIED="1712105225760" TEXT="...oder von Gnuplot selber bestimmen">
<icon BUILTIN="button_ok"/>
</node>
<node CREATED="1712101810700" ID="ID_1562762513" LINK="#ID_1224268144" MODIFIED="1712105224048" TEXT="stats-Kommando stellt auch lineares Modell auf">
<icon BUILTIN="idea"/>
</node>
<node COLOR="#338800" CREATED="1712105236313" ID="ID_445047208" MODIFIED="1712105251309" TEXT="Hierf&#xfc;r Funktion regLine(x) in gnuplot definieren">
<icon BUILTIN="button_ok"/>
</node>
<node COLOR="#338800" CREATED="1712105256166" ID="ID_411655467" MODIFIED="1712105278628" TEXT="&#x27f9; diese dann verwenden f&#xfc;r arrow-command">
<icon BUILTIN="button_ok"/>
</node>
</node>
</node>
</node>
</node>
@ -123747,6 +123809,24 @@ unsigned int ThreadIdAsInt = *static_cast&lt;unsigned int*&gt;(static_cast&lt;vo
<node CREATED="1711742209926" ID="ID_1413844596" LINK="http://gnuplot.info/docs_5.5/loc12019.html" MODIFIED="1711742320361" TEXT="set grid : Grid-Einstellungen">
<linktarget COLOR="#477893" DESTINATION="ID_1413844596" ENDARROW="Default" ENDINCLINATION="-195;19;" ID="Arrow_ID_111743811" SOURCE="ID_849486601" STARTARROW="None" STARTINCLINATION="-177;0;"/>
</node>
<node CREATED="1712101607416" ID="ID_369816002" LINK="http://gnuplot.info/docs_5.5/loc18224.html" MODIFIED="1712101632683" TEXT="stats">
<node CREATED="1712101609695" ID="ID_1655740658" MODIFIED="1712101622609" TEXT="berechnet Statistiken &#xfc;ber ausgew&#xe4;hlte Datenreihen"/>
<node CREATED="1712101634236" ID="ID_10058724" MODIFIED="1712101676181" TEXT="gitbt diese auf STDOUT aus &#x2014; es sei denn man setzt &quot;nooutput&quot;"/>
<node CREATED="1712101677793" ID="ID_1814855177" MODIFIED="1712101695129" TEXT="bindet Statistik-Ergebnisse in STATS_variable">
<node CREATED="1712101706074" ID="ID_1869792040" MODIFIED="1712101719558" TEXT="Tip: show variables STATS_">
<icon BUILTIN="idea"/>
</node>
<node CREATED="1712101721824" ID="ID_800388304" MODIFIED="1712101735434" TEXT="geht f&#xfc;r beliebiges Variablen-Pr&#xe4;fix"/>
</node>
<node CREATED="1712101826528" ID="ID_1224268144" MODIFIED="1712101907514" TEXT="lineares Modell">
<icon BUILTIN="idea"/>
<node CREATED="1712101846717" MODIFIED="1712101846717" TEXT="STATS_slope"/>
<node CREATED="1712101854673" MODIFIED="1712101854673" TEXT="STATS_intercept"/>
<node CREATED="1712101860555" MODIFIED="1712101860555" TEXT="STATS_slope_err"/>
<node CREATED="1712101866122" MODIFIED="1712101866122" TEXT="STATS_intercept_err"/>
<node CREATED="1712101877565" MODIFIED="1712101877565" TEXT="STATS_correlation"/>
</node>
</node>
</node>
<node CREATED="1710634777038" ID="ID_600739274" MODIFIED="1710634780193" TEXT="drawing structure">
<node CREATED="1710634781349" ID="ID_577491247" MODIFIED="1710634788480" TEXT="fixed sequence of layers">